diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c1f7ecd82..099f0aebe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -71,13 +71,11 @@ jobs: restore-keys: 'pnpm-home-${{ runner.os }}-${{ runner.arch }}-' - name: Resolve devenv run: | - if [ -z "${DEVENV_REV:-}" ]; then - DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) + DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) if [ -z "$DEVENV_REV" ] || [ "$DEVENV_REV" = "null" ]; then echo '::error::devenv.lock missing .nodes.devenv.locked.rev' exit 1 fi - fi resolve_devenv() { nix build --no-link --print-out-paths "github:cachix/devenv/$DEVENV_REV#devenv" @@ -151,6 +149,110 @@ jobs: EOF echo "::warning::Intentional failure for diagnostics validation (#272)" exit 1 + - name: Verify OTEL shell entry + shell: bash + run: | + __nix_gc_retry() { + local __task='devenv tasks run otel:test --mode before' __max=${NIX_GC_RACE_MAX_RETRIES:-10} __heartbeat=${CI_PROGRESS_HEARTBEAT_SECONDS:-60} __n=1 __log __rc __path __start __now __elapsed __hb_pid __flattened __saw_invalid_path __saw_cachix_signature + __start=$(date +%s) + + __write_summary() { + [ -n "${GITHUB_STEP_SUMMARY:-}" ] || return 0 + { + echo "### CI Task" + # Keep summary values plain text. Backticks inside double quotes trigger + # shell command substitution and turned failed-task metadata into bogus + # commands on GitHub Actions runners. + echo "- Task: $__task" + echo "- Status: $1" + echo "- Duration: $__elapsed s" + echo "- Attempts: $__n/$__max" + [ -z "${2:-}" ] || echo "- Note: $2" + } >> "$GITHUB_STEP_SUMMARY" + } + + while [ "$__n" -le "$__max" ]; do + echo "::notice::[ci] starting $__task (attempt $__n/$__max)" + ( + while sleep "$__heartbeat"; do + __now=$(date +%s) + __elapsed=$((__now - __start)) + echo "::notice::[ci] $__task still running after $__elapsed s (attempt $__n/$__max)" + done + ) & + __hb_pid=$! + + __log=$(mktemp) + set +e + eval "$1" > >(tee -a "$__log") 2> >(tee -a "$__log" >&2) + __rc=$? + set -e + + kill "$__hb_pid" 2>/dev/null || true + wait "$__hb_pid" 2>/dev/null || true + + __now=$(date +%s) + __elapsed=$((__now - __start)) + + [ $__rc -eq 0 ] && { + echo "::notice::[ci] completed $__task in $__elapsed s" + if [ "$__n" -gt 1 ]; then + __write_summary success "Recovered from Nix GC race after retry" + else + __write_summary success + fi + rm -f "$__log" + return 0 + } + + __flattened=$(tr ' + ' ' ' < "$__log" | sed "s/$(printf '\033')\[[0-9;]*m//g") + __path=$(printf '%s' "$__flattened" | sed -n "s#.*error:[[:space:]]*path '\\(/nix/store/[^']*\\)'[[:space:]]*is not valid.*#\\1#p" | head -1 | tr -d '[:space:]' || true) + __saw_invalid_path=false + __saw_cachix_signature=false + [ -n "$__path" ] && __saw_invalid_path=true + printf '%s' "$__flattened" | grep -q 'Failed to convert config\.cachix to JSON' && __saw_cachix_signature=true || true + # Match the semantic signal, not the exact quote punctuation, so the shell + # stays valid even when the human-facing error wraps the option name. + printf '%s' "$__flattened" | grep -q 'while evaluating the option' && printf '%s' "$__flattened" | grep -q 'cachix\.package' && __saw_cachix_signature=true || true + rm -f "$__log" + if [ "$__saw_invalid_path" != true ] && [ "$__saw_cachix_signature" != true ]; then + echo "::warning::[ci] $__task failed after $__elapsed s without a detected Nix store validity race" + __write_summary failure "No Nix GC race signature detected" + return $__rc + fi + if [ "$__saw_cachix_signature" = true ] && [ -n "$__path" ]; then + echo "::warning::Nix store validity race detected for $__task via cachix eval wrapper (attempt $__n/$__max): $__path" + elif [ "$__saw_cachix_signature" = true ]; then + # The cachix wrapper can surface the GC race before the invalid path makes + # it into the flattened log. Retrying after clearing the eval cache still + # recovers that case in practice. + echo "::warning::Nix store validity race detected for $__task via cachix eval wrapper without extracted store path (attempt $__n/$__max)" + else + echo "::warning::Nix store validity race detected for $__task (attempt $__n/$__max): $__path" + fi + [ -z "$__path" ] || nix-store --realise "$__path" 2>/dev/null || true + rm -rf ~/.cache/nix/eval-cache-* + __n=$((__n + 1)) + done + + __now=$(date +%s) + __elapsed=$((__now - __start)) + echo "::error::Nix GC race retry exhausted for $__task ($__max attempts)" + __write_summary failure "Nix GC race retry exhausted" + return 1 + }; __nix_gc_retry 'if [ -n "${NIX_CONFIG:-}" ]; then NIX_CONFIG_WITH_APPEND=$(printf '"'"'%s\n%s'"'"' "$NIX_CONFIG" '"'"'restrict-eval = false'"'"'); else NIX_CONFIG_WITH_APPEND='"'"'restrict-eval = false'"'"'; fi; NIX_CONFIG="$NIX_CONFIG_WITH_APPEND" PNPM_HOME="${PNPM_HOME:-${{ github.workspace }}/.pnpm-home}" PNPM_STORE_DIR="${PNPM_STORE_DIR:-${{ runner.temp }}/pnpm-store/${{ github.job }}}" DT_PASSTHROUGH=1 "${DEVENV_BIN:?DEVENV_BIN not set}" tasks run otel:test --mode before' + command -v script >/dev/null 2>&1 + tmp_log="$(mktemp)" + printf 'printf "OTEL_MODE=%%s\n" "$OTEL_MODE" + printf "OTEL_GRAFANA_LINK_URL=%%s\n" "$OTEL_GRAFANA_LINK_URL" + exit + ' | script -qefc '"${DEVENV_BIN:?DEVENV_BIN not set}" shell --no-reload' "$tmp_log" + grep -q '\[otel\] Using .* OTEL stack' "$tmp_log" + grep -q '\[otel\] Start with: devenv up' "$tmp_log" + grep -q '^OTEL_MODE=' "$tmp_log" + grep -q '^OTEL_GRAFANA_LINK_URL=http' "$tmp_log" + rm -f "$tmp_log" - name: Type check run: | __nix_gc_retry() { @@ -349,13 +451,11 @@ jobs: restore-keys: 'pnpm-home-${{ runner.os }}-${{ runner.arch }}-' - name: Resolve devenv run: | - if [ -z "${DEVENV_REV:-}" ]; then - DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) + DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) if [ -z "$DEVENV_REV" ] || [ "$DEVENV_REV" = "null" ]; then echo '::error::devenv.lock missing .nodes.devenv.locked.rev' exit 1 fi - fi resolve_devenv() { nix build --no-link --print-out-paths "github:cachix/devenv/$DEVENV_REV#devenv" @@ -630,13 +730,11 @@ jobs: restore-keys: 'pnpm-home-${{ runner.os }}-${{ runner.arch }}-' - name: Resolve devenv run: | - if [ -z "${DEVENV_REV:-}" ]; then - DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) + DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) if [ -z "$DEVENV_REV" ] || [ "$DEVENV_REV" = "null" ]; then echo '::error::devenv.lock missing .nodes.devenv.locked.rev' exit 1 fi - fi resolve_devenv() { nix build --no-link --print-out-paths "github:cachix/devenv/$DEVENV_REV#devenv" @@ -911,13 +1009,11 @@ jobs: restore-keys: 'pnpm-home-${{ runner.os }}-${{ runner.arch }}-' - name: Resolve devenv run: | - if [ -z "${DEVENV_REV:-}" ]; then - DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) + DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) if [ -z "$DEVENV_REV" ] || [ "$DEVENV_REV" = "null" ]; then echo '::error::devenv.lock missing .nodes.devenv.locked.rev' exit 1 fi - fi resolve_devenv() { nix build --no-link --print-out-paths "github:cachix/devenv/$DEVENV_REV#devenv" @@ -1170,13 +1266,11 @@ jobs: authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' - name: Resolve devenv run: | - if [ -z "${DEVENV_REV:-}" ]; then - DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) + DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) if [ -z "$DEVENV_REV" ] || [ "$DEVENV_REV" = "null" ]; then echo '::error::devenv.lock missing .nodes.devenv.locked.rev' exit 1 fi - fi resolve_devenv() { nix build --no-link --print-out-paths "github:cachix/devenv/$DEVENV_REV#devenv" @@ -1335,13 +1429,11 @@ jobs: restore-keys: 'pnpm-home-${{ runner.os }}-${{ runner.arch }}-' - name: Resolve devenv run: | - if [ -z "${DEVENV_REV:-}" ]; then - DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) + DEVENV_REV=$(jq -r .nodes.devenv.locked.rev devenv.lock) if [ -z "$DEVENV_REV" ] || [ "$DEVENV_REV" = "null" ]; then echo '::error::devenv.lock missing .nodes.devenv.locked.rev' exit 1 fi - fi resolve_devenv() { nix build --no-link --print-out-paths "github:cachix/devenv/$DEVENV_REV#devenv" diff --git a/.github/workflows/ci.yml.genie.ts b/.github/workflows/ci.yml.genie.ts index 676928280..1b11db633 100644 --- a/.github/workflows/ci.yml.genie.ts +++ b/.github/workflows/ci.yml.genie.ts @@ -68,6 +68,28 @@ const failureReminderStep = { ].join('\n'), } as const +/** + * Verify the lock-pinned devenv rev emits OTEL shell-entry messages under a real PTY. + * `--no-reload` keeps the probe on the post-init shell-output path we care about + * without exercising the separate interactive reload loop, which currently + * panics on the pinned upstream commit. + */ +const verifyOtelShellEntryStep = { + name: 'Verify OTEL shell entry', + shell: 'bash' as const, + run: [ + runDevenvTasksBefore('otel:test'), + 'command -v script >/dev/null 2>&1', + 'tmp_log="$(mktemp)"', + `printf 'printf "OTEL_MODE=%%s\\n" "$OTEL_MODE"\nprintf "OTEL_GRAFANA_LINK_URL=%%s\\n" "$OTEL_GRAFANA_LINK_URL"\nexit\n' | script -qefc '"${'${DEVENV_BIN:?DEVENV_BIN not set}'}" shell --no-reload' "$tmp_log"`, + "grep -q '\\[otel\\] Using .* OTEL stack' \"$tmp_log\"", + "grep -q '\\[otel\\] Start with: devenv up' \"$tmp_log\"", + "grep -q '^OTEL_MODE=' \"$tmp_log\"", + "grep -q '^OTEL_GRAFANA_LINK_URL=http' \"$tmp_log\"", + 'rm -f "$tmp_log"', + ].join('\n'), +} as const + /** * Temporary diagnostics summary for #272. * Remove once #201/#272 are root-caused and we can return to a minimal CI flow. @@ -112,7 +134,7 @@ const nixDiagnosticsSummaryStep = { ].join('\n'), } as const -const job = (step: { name: string; run: string }) => ({ +const job = (step: { name: string; run: string }, extraSteps: readonly any[] = []) => ({ 'runs-on': namespaceRunner({ profile: 'namespace-profile-linux-x86-64', runId: '${{ github.run_id }}', @@ -121,6 +143,7 @@ const job = (step: { name: string; run: string }) => ({ env: standardCIEnv, steps: [ ...baseSteps, + ...extraSteps, step, savePnpmStoreStep(), nixDiagnosticsSummaryStep, @@ -186,7 +209,7 @@ const jobs: Record | ReturnType Explore -> Tempo ``` @@ -93,7 +93,7 @@ otel-trace | cat # plain text: trace: The function parses `TRACEPARENT` (W3C format: `version-traceId-spanId-traceFlags`) and constructs a Grafana Explore URL from `OTEL_GRAFANA_LINK_URL`. -**Note:** Auto-display of the trace URL on shell entry is blocked by devenv's PTY task runner (`drain_pty_to_vt`), which consumes all shell output before the interactive session starts. Tracked upstream in [cachix/devenv#2500](https://github.com/cachix/devenv/issues/2500). +**Note:** This repo now uses `devenv.messages` to auto-display the OTEL shell-entry notice. `otel-trace` remains as an on-demand way to reopen the same link later in the session. The repo is temporarily pinned to the upstream post-[cachix/devenv#2661](https://github.com/cachix/devenv/pull/2661) commit while waiting for the next tagged release. ### `otel-span` -- Trace span CLI diff --git a/context/workarounds/devenv-issues.md b/context/workarounds/devenv-issues.md index cf9d4e259..ef1267b85 100644 --- a/context/workarounds/devenv-issues.md +++ b/context/workarounds/devenv-issues.md @@ -124,6 +124,10 @@ removed as they are no longer needed. **Issue:** https://github.com/cachix/devenv/issues/2500 +**Upstream status:** Fixed by https://github.com/cachix/devenv/pull/2661. + +**Repo status:** Temporarily resolved here by pinning `devenv` to the merged upstream commit while waiting for the next tagged release. + **Affected repos:** Any repo wanting to display messages (e.g. trace URLs) on shell entry **Symptoms:** @@ -136,11 +140,11 @@ removed as they are no longer needed. devenv's PTY task runner sends two echo sentinels and reads until both are found, feeding all output to a headless VT. This intentionally hides task runner noise but also swallows any user-facing messages from `enterShell`. -**Workaround:** - -Provide an on-demand `otel-trace` shell function instead of auto-displaying. The function is defined during rcfile sourcing and stays available in the interactive shell. +**Current repo approach:** -**Upstream proposal:** A post-drain hook mechanism (env var, file-based, or `ShellCommand` variant) to run code after the interactive session starts. +- Emit OTEL shell-entry notices through `devenv.messages` task output. +- Reuse the exported Grafana link env in `otel-trace` for on-demand reopening. +- Keep a TODO to return to the `v2.0.7` tag once that release is available. --- @@ -243,10 +247,9 @@ Git hooks run in a subprocess that doesn't inherit the direnv environment. - Remove manual JSON trace post-processing from CI pipelines - Update R10 status in this document to reflect full compliance -- **DEVENV-05 fixed (post-drain hook via #2500):** - - Implement auto-display of otel trace URL using the new hook mechanism - - Remove "on-demand only" comment in `nix/devenv-modules/otel.nix` - - Update `context/otel.md` to reflect auto-display capability +- **DEVENV-05 follow-up (tagged release contains #2661):** + - Replace the temporary commit pin with the `v2.0.7` tag + - Remove the temporary pin note from `devenv.yaml` / CI docs - **COMPAT-01 improved (web coding agent support):** - When Claude Code Web adds Nix domains to allowlist: update status, remove "Full internet" workaround diff --git a/devenv.lock b/devenv.lock index 6ef29ecf8..7aa11e8c4 100644 --- a/devenv.lock +++ b/devenv.lock @@ -162,17 +162,17 @@ "rust-overlay": "rust-overlay" }, "locked": { - "lastModified": 1774168944, - "narHash": "sha256-i1G6n/7Z5fO9RhplzXQSTiLyh1Cs0GhoCoEStFLARtA=", + "lastModified": 1774649847, + "narHash": "sha256-2h7rrOzLjyQdt20yHKPnK0fA+v0fj+whGaDBnmfGahY=", "owner": "cachix", "repo": "devenv", - "rev": "55d2bb4a3cc710ba82cc8644f4419db3a802e1a4", + "rev": "61170924d98492ad8842dca02ad8b912305d308b", "type": "github" }, "original": { "owner": "cachix", - "ref": "v2.0.6", "repo": "devenv", + "rev": "61170924d98492ad8842dca02ad8b912305d308b", "type": "github" } }, diff --git a/devenv.nix b/devenv.nix index 1ba50ec30..266477055 100644 --- a/devenv.nix +++ b/devenv.nix @@ -319,8 +319,7 @@ in optionalTasks = [ "pnpm:install" "genie:run" - "mr:sync" - "ts:emit" + "mr:apply" ]; completionsCliNames = [ "genie" @@ -364,6 +363,7 @@ in tasks."genie:check".after = [ "pnpm:install" ]; tasks."lint:check:genie".after = [ "pnpm:install" ]; tasks."mr:sync".after = [ "pnpm:install" ]; + tasks."mr:apply".after = [ "pnpm:install" ]; tasks."gh:apply-settings" = { after = [ "genie:run" ]; diff --git a/devenv.yaml b/devenv.yaml index 162bc5b67..3c3d7c67b 100644 --- a/devenv.yaml +++ b/devenv.yaml @@ -1,6 +1,9 @@ inputs: devenv: - url: github:cachix/devenv/v2.0.6 + # Temporary pin to the post-#2661 commit so shell-entry task messages are + # available before the upstream v2.0.7 release lands. + # TODO: Switch back to github:cachix/devenv/v2.0.7 once it is released. + url: github:cachix/devenv/61170924d98492ad8842dca02ad8b912305d308b nixpkgs: url: github:NixOS/nixpkgs/nixos-unstable git-hooks: diff --git a/genie/ci-workflow.ts b/genie/ci-workflow.ts index a1c554ed7..7d5ae2efa 100644 --- a/genie/ci-workflow.ts +++ b/genie/ci-workflow.ts @@ -351,6 +351,10 @@ export const cachixStep = (opts: { name: string; authToken?: string }) => ({ /** * Prepare lock-pinned devenv metadata from devenv.lock. + * + * The lock may temporarily point at an upstream commit instead of a release tag + * while we validate a fix ahead of the next devenv release. + * TODO: Drop that temporary pin once v2.0.7 is available. */ export const preparePinnedDevenvStep = { name: 'Use pinned devenv from lock', @@ -554,9 +558,7 @@ nix run "github:overengineeringstudio/effect-utils/$EU_REV#megarepo" -- apply -- */ export const validateNixStoreStep = { name: 'Resolve devenv', - run: `if [ -z "${'${DEVENV_REV:-}'}" ]; then - ${resolveDevenvRevScript} -fi + run: `${resolveDevenvRevScript} ${resolveDevenvFnScript} diff --git a/nix/devenv-modules/otel.nix b/nix/devenv-modules/otel.nix index 55c28575a..91fa460f8 100644 --- a/nix/devenv-modules/otel.nix +++ b/nix/devenv-modules/otel.nix @@ -359,6 +359,156 @@ let # Whether to include local OTEL infrastructure (collector, tempo, grafana processes) needsLocalInfra = mode != "system"; + otelResolveShellState = '' + resolve_otel_shell_state() { + if [ "$OTEL_MODE" = "auto" ]; then + if [ -n "''${OTEL_STATE_DIR:-}" ]; then + OTEL_MODE="system" + else + OTEL_MODE="local" + fi + fi + + if [ "$OTEL_MODE" = "system" ]; then + if [ -z "''${OTEL_STATE_DIR:-}" ]; then + echo "[otel] ERROR: OTEL_MODE=system requires OTEL_STATE_DIR" >&2 + return 1 + fi + if [ -z "''${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]; then + echo "[otel] ERROR: OTEL_MODE=system requires OTEL_EXPORTER_OTLP_ENDPOINT" >&2 + return 1 + fi + if [ -z "''${OTEL_GRAFANA_URL:-}" ]; then + echo "[otel] ERROR: OTEL_MODE=system requires OTEL_GRAFANA_URL" >&2 + return 1 + fi + if ! command -v otel >/dev/null 2>&1; then + echo "[otel] ERROR: OTEL_MODE=system requires otel CLI for dashboard sync" >&2 + return 1 + fi + if [ "${toString (builtins.length extraDashboards)}" -gt 0 ]; then + echo "[otel] ERROR: extraDashboards is not supported in OTEL_MODE=system" >&2 + return 1 + fi + if ! otel dash sync \ + --source "${allDashboards}" \ + --target "$OTEL_STATE_DIR/dashboards" >/dev/null 2>&1; then + echo "[otel] ERROR: otel dash sync failed" >&2 + return 1 + fi + _otel_mode_msg="[otel] Using system-level OTEL stack (mode=$OTEL_MODE)" + else + export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${toString otelCollectorPort}" + export OTEL_GRAFANA_URL="http://127.0.0.1:${toString grafanaPort}" + export OTEL_SPAN_SPOOL_DIR="${spoolDir}" + _otel_mode_msg="[otel] Using local devenv OTEL stack (mode=$OTEL_MODE)" + fi + + _otel_grafana="$OTEL_GRAFANA_URL" + if [ -n "''${TS_HOSTNAME:-}" ]; then + _otel_grafana="''${_otel_grafana//127.0.0.1/$TS_HOSTNAME}" + fi + if [ -n "''${TRACEPARENT:-}" ]; then + IFS='-' read -r _ _otel_trace_id _ _ <<< "$TRACEPARENT" + _panes='{"a":{"datasource":{"type":"tempo","uid":"tempo"},"queries":[{"refId":"A","datasource":{"type":"tempo","uid":"tempo"},"queryType":"traceql","query":"'"$_otel_trace_id"'"}],"range":{"from":"now-1h","to":"now"}}}' + _encoded=$(printf '%s' "$_panes" | sed 's/{/%7B/g;s/}/%7D/g;s/\[/%5B/g;s/\]/%5D/g;s/"/%22/g;s/:/%3A/g;s/,/%2C/g;s/ /%20/g') + _otel_grafana_link_url="$_otel_grafana/explore?schemaVersion=1&panes=$_encoded&orgId=1" + else + unset _otel_trace_id + _otel_grafana_link_url="$_otel_grafana" + fi + if [ -n "''${_otel_trace_id:-}" ]; then + _otel_trace_label="trace:$_otel_trace_id" + else + _otel_trace_label="grafana" + fi + _otel_grafana_display="$(printf '\e]8;;%s\x07\e[4m%s\e[24m\e]8;;\x07' "$_otel_grafana_link_url" "$_otel_trace_label")" + _otel_start_msg="[otel] Start with: devenv up | $_otel_grafana_display" + } + ''; + + otelDetectShellEntryState = '' + detect_otel_shell_entry_state() { + # Detect cold vs warm start (setup-git-hash written by setup.nix) + _cold_start="false" + if [ ! -f .direnv/task-cache/setup-git-hash ]; then + _cold_start="true" + elif [ "$(git rev-parse HEAD 2>/dev/null || echo no-git)" != "$(cat .direnv/task-cache/setup-git-hash 2>/dev/null || echo "")" ]; then + _cold_start="true" + fi + + # Detect what triggered this shell reload by comparing watched file mtimes. + # Uses devenv's input-paths.txt (nix inputs that affect the shell derivation), + # excluding .devenv/bootstrap/ files which are regenerated on every eval. + # Missing paths are tolerated here because input files can legitimately + # disappear between eval and shell startup while the user is editing. + _reload_trigger="unknown" + _otel_mtime_snapshot=".direnv/otel-watch-mtimes" + if [ -f ".devenv/input-paths.txt" ]; then + _otel_current=$( + while IFS= read -r _otel_path; do + [ -n "$_otel_path" ] || continue + [ -e "$_otel_path" ] || continue + ${pkgs.coreutils}/bin/stat -c '%Y %n' "$_otel_path" + done < <(${pkgs.gnugrep}/bin/grep -v '\.devenv/bootstrap/' .devenv/input-paths.txt) \ + | ${pkgs.coreutils}/bin/sort -k2 + ) + if [ ! -f "$_otel_mtime_snapshot" ]; then + _reload_trigger="initial" + elif [ "$_otel_current" = "$(${pkgs.coreutils}/bin/cat "$_otel_mtime_snapshot" 2>/dev/null)" ]; then + _reload_trigger="env-change" + else + _otel_changed=$( + (${pkgs.diffutils}/bin/diff <(${pkgs.coreutils}/bin/cat "$_otel_mtime_snapshot") <(echo "$_otel_current") 2>/dev/null || true) \ + | ${pkgs.gnugrep}/bin/grep '^[<>]' | ${pkgs.gawk}/bin/awk '{print $NF}' | ${pkgs.coreutils}/bin/sort -u \ + | ${pkgs.gnused}/bin/sed "s|^''${DEVENV_ROOT:-.}/||" \ + | ${pkgs.coreutils}/bin/head -5 | ${pkgs.coreutils}/bin/paste -sd ',' - + ) + _reload_trigger="''${_otel_changed:-unknown}" + fi + ${pkgs.coreutils}/bin/mkdir -p .direnv + echo "$_otel_current" > "$_otel_mtime_snapshot" + fi + } + ''; + + otelEmitShellEntry = '' + emit_otel_shell_entry_span() { + if [ -z "''${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ] \ + || [ -z "''${TRACEPARENT:-}" ] \ + || [ -z "''${OTEL_SHELL_ENTRY_NS:-}" ]; then + return 0 + fi + + IFS='-' read -r _ _otel_shell_trace_id _otel_shell_root_span_id _ <<< "$TRACEPARENT" + + # Shell-root tracing must use the store path directly instead of relying + # on PATH, because both shell hooks and early shell-entry tasks can run + # before package PATH mutations are fully visible. + _otel_span_bin="${otelSpan}/bin/otel-span" + [ -x "$_otel_span_bin" ] || return 0 + + # enterShell can run after traced setup tasks. If we let otel-span infer a + # parent from the ambient TRACEPARENT/OTEL_TASK_TRACEPARENT here, the + # shell root span can become self-parented or collide with later root + # spans. Emit it from explicit shell IDs instead. + ( + unset TRACEPARENT OTEL_TASK_TRACEPARENT + "$_otel_span_bin" run "devenv" "shell:entry" \ + --trace-id "$_otel_shell_trace_id" \ + --span-id "$_otel_shell_root_span_id" \ + --start-time-ns "$OTEL_SHELL_ENTRY_NS" \ + --end-time-ns "$(${pkgs.coreutils}/bin/date +%s%N)" \ + --attr "cold_start=$_cold_start" \ + --attr "reload.trigger=$_reload_trigger" \ + -- true + ) || true + + export TRACEPARENT="00-$_otel_shell_trace_id-$_otel_shell_root_span_id-01" + unset OTEL_TASK_TRACEPARENT OTEL_SHELL_ENTRY_NS + } + ''; + in { packages = [ @@ -372,154 +522,41 @@ in env.OTEL_MODE = mode; - # mkAfter ensures this runs after other enterShell code, so env vars - # (including TRACEPARENT from setup:gate) are available. - # Note: devenv's PTY task runner drains all PROMPT_COMMAND output before the - # interactive session, so we provide `otel-trace` for on-demand trace URL access. + # OTEL shell state is resolved in a task so the same source of truth can + # export env vars and emit the post-init shell message via devenv.messages. + # The shell root span is emitted in a dedicated task after setup work, so + # enterShell only consumes exported state and marks the interactive handoff. enterShell = lib.mkAfter '' - # ── Mode detection ────────────────────────────────────────────────── - # Resolve "auto" to "system" or "local" at runtime. - # Contract: a system-level OTEL stack (e.g. home-manager otel-stack module) - # advertises itself by setting OTEL_STATE_DIR as a session variable. - if [ "$OTEL_MODE" = "auto" ]; then - if [ -n "''${OTEL_STATE_DIR:-}" ]; then - OTEL_MODE="system" - else - OTEL_MODE="local" - fi - fi - - if [ "$OTEL_MODE" = "system" ]; then - if [ -z "''${OTEL_STATE_DIR:-}" ]; then - echo "[otel] ERROR: OTEL_MODE=system requires OTEL_STATE_DIR" >&2 - return 1 2>/dev/null || exit 1 - fi - if [ -z "''${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]; then - echo "[otel] ERROR: OTEL_MODE=system requires OTEL_EXPORTER_OTLP_ENDPOINT" >&2 - return 1 2>/dev/null || exit 1 - fi - if ! command -v otel >/dev/null 2>&1; then - echo "[otel] ERROR: OTEL_MODE=system requires otel CLI for dashboard sync" >&2 - return 1 2>/dev/null || exit 1 - fi - if [ "${toString (builtins.length extraDashboards)}" -gt 0 ]; then - echo "[otel] ERROR: extraDashboards is not supported in OTEL_MODE=system" >&2 - return 1 2>/dev/null || exit 1 - fi - if ! otel dash sync \ - --source "${allDashboards}" \ - --target "$OTEL_STATE_DIR/dashboards" >/dev/null 2>&1; then - echo "[otel] ERROR: otel dash sync failed" >&2 - return 1 2>/dev/null || exit 1 - fi - _otel_entry_msg="[otel] Using system-level OTEL stack (mode=$OTEL_MODE)" - else - # Local devenv stack — set env vars with local hash-derived ports - export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${toString otelCollectorPort}" - export OTEL_GRAFANA_URL="http://127.0.0.1:${toString grafanaPort}" - export OTEL_SPAN_SPOOL_DIR="${spoolDir}" - _otel_entry_msg="[otel] Using local devenv OTEL stack (mode=$OTEL_MODE)" - fi - - _otel_grafana="$OTEL_GRAFANA_URL" - if [ -n "''${TS_HOSTNAME:-}" ]; then - _otel_grafana="''${_otel_grafana//127.0.0.1/$TS_HOSTNAME}" - fi - # Build Grafana link: trace-specific when TRACEPARENT is available, dashboard otherwise - if [ -n "''${TRACEPARENT:-}" ]; then - IFS='-' read -r _ _otel_trace_id _ _ <<< "$TRACEPARENT" - _panes='{"a":{"datasource":{"type":"tempo","uid":"tempo"},"queries":[{"refId":"A","datasource":{"type":"tempo","uid":"tempo"},"queryType":"traceql","query":"'"$_otel_trace_id"'"}],"range":{"from":"now-1h","to":"now"}}}' - _encoded=$(printf '%s' "$_panes" | sed 's/{/%7B/g;s/}/%7D/g;s/\[/%5B/g;s/\]/%5D/g;s/"/%22/g;s/:/%3A/g;s/,/%2C/g;s/ /%20/g') - _grafana_link_url="$_otel_grafana/explore?schemaVersion=1&panes=$_encoded&orgId=1" - else - _grafana_link_url="$_otel_grafana" - fi - if [ -n "''${_otel_trace_id:-}" ]; then - _trace_label="trace:$_otel_trace_id" - else - _trace_label="grafana" - fi - if [ -t 2 ]; then - _grafana_display="$(printf '\e]8;;%s\x07\e[4m%s\e[24m\e]8;;\x07' "$_grafana_link_url" "$_trace_label")" - else - _grafana_display="$_trace_label $_grafana_link_url" - fi - _otel_entry_msg="$_otel_entry_msg -[otel] Start with: devenv up | $_grafana_display" - - # devenv's PTY task runner drains all PROMPT_COMMAND output before the - # interactive session starts, so we can't display messages via echo. - # Instead, provide an `otel-trace` shell function for on-demand access. - # No `export -f` needed — function is defined during rcfile sourcing - # and stays available in the interactive shell. - export OTEL_GRAFANA_LINK_URL="$_grafana_link_url" + # `otel-trace` remains as a cheap on-demand way to reopen the current link, + # but the user-visible shell-entry message now comes from `otel:shell-env`. otel_trace() { + local _url="''${OTEL_GRAFANA_LINK_URL:-''${OTEL_GRAFANA_URL:-}}" + if [ -z "$_url" ]; then + echo "[otel] No OTEL grafana link available" + return 1 + fi if [ -n "''${TRACEPARENT:-}" ]; then IFS='-' read -r _ _tid _ _ <<< "$TRACEPARENT" - local _url="''${OTEL_GRAFANA_LINK_URL:-$OTEL_GRAFANA_URL}" + local _label="trace:$_tid" if [ -t 1 ]; then - printf '\e]8;;%s\x07\e[4m%s\e[24m\e]8;;\x07\n' "$_url" "trace:$_tid" + printf '\e]8;;%s\x07\e[4m%s\e[24m\e]8;;\x07\n' "$_url" "$_label" else - echo "trace:$_tid $_url" + echo "$_label $_url" fi else - echo "[otel] No TRACEPARENT available" + if [ -t 1 ]; then + printf '\e]8;;%s\x07\e[4m%s\e[24m\e]8;;\x07\n' "$_url" "grafana" + else + echo "grafana $_url" + fi fi } alias otel-trace=otel_trace - # Detect cold vs warm start (setup-git-hash written by setup.nix) - _cold_start="false" - if [ ! -f .direnv/task-cache/setup-git-hash ]; then - _cold_start="true" - elif [ "$(git rev-parse HEAD 2>/dev/null || echo no-git)" != "$(cat .direnv/task-cache/setup-git-hash 2>/dev/null || echo "")" ]; then - _cold_start="true" - fi - - # Detect what triggered this shell reload by comparing watched file mtimes. - # Uses devenv's input-paths.txt (nix inputs that affect the shell derivation), - # excluding .devenv/bootstrap/ files which are regenerated on every eval. - # xargs stat is ~2ms for ~50 files — negligible overhead. - _reload_trigger="unknown" - _otel_mtime_snapshot=".direnv/otel-watch-mtimes" - if [ -f ".devenv/input-paths.txt" ]; then - _otel_current=$(grep -v '\.devenv/bootstrap/' .devenv/input-paths.txt \ - | xargs stat -c '%Y %n' 2>/dev/null | sort -k2) - if [ ! -f "$_otel_mtime_snapshot" ]; then - _reload_trigger="initial" - elif [ "$_otel_current" = "$(cat "$_otel_mtime_snapshot" 2>/dev/null)" ]; then - _reload_trigger="env-change" - else - _otel_changed=$(diff <(cat "$_otel_mtime_snapshot") <(echo "$_otel_current") 2>/dev/null \ - | grep '^[<>]' | awk '{print $NF}' | sort -u \ - | sed "s|^''${DEVENV_ROOT:-.}/||" \ - | head -5 | paste -sd ',' -) - _reload_trigger="''${_otel_changed:-unknown}" - fi - mkdir -p .direnv - echo "$_otel_current" > "$_otel_mtime_snapshot" - fi - - # Emit root shell:entry span covering the full setup duration. - # TRACEPARENT and OTEL_SHELL_ENTRY_NS are propagated from setup:gate via - # devenv's native task output -> env mechanism (devenv.env convention). - if command -v otel-span >/dev/null 2>&1 \ - && [ -n "''${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ] \ - && [ -n "''${TRACEPARENT:-}" ] \ - && [ -n "''${OTEL_SHELL_ENTRY_NS:-}" ]; then - IFS='-' read -r _ _trace_id _span_id _ <<< "$TRACEPARENT" - ( - unset TRACEPARENT - otel-span run "devenv" "shell:entry" \ - --trace-id "$_trace_id" \ - --span-id "$_span_id" \ - --start-time-ns "$OTEL_SHELL_ENTRY_NS" \ - --end-time-ns "$(date +%s%N)" \ - --attr "cold_start=$_cold_start" \ - --attr "reload.trigger=$_reload_trigger" \ - -- true - ) || true - fi + # setup:gate seeds shell-root trace IDs for setup tasks. Clear the + # task-scoped context markers before handing control to the interactive + # shell so later `dt` roots do not accidentally reuse shell bootstrap state. + unset OTEL_TASK_TRACEPARENT OTEL_SHELL_ENTRY_NS # Mark the moment the shell becomes interactive (after all setup + OTEL work). # Consumed by dt.nix for the shell.ready_ms span attribute. @@ -589,6 +626,68 @@ in # Tasks # ========================================================================= + tasks."otel:shell-env" = { + description = "Resolve OTEL shell env and shell-entry message"; + exports = [ + "OTEL_MODE" + "OTEL_EXPORTER_OTLP_ENDPOINT" + "OTEL_GRAFANA_URL" + "OTEL_SPAN_SPOOL_DIR" + "OTEL_GRAFANA_LINK_URL" + ]; + exec = '' + set -euo pipefail + ${otelResolveShellState} + resolve_otel_shell_state + + ${pkgs.jq}/bin/jq -n \ + --arg mode "$OTEL_MODE" \ + --arg endpoint "''${OTEL_EXPORTER_OTLP_ENDPOINT:-}" \ + --arg grafanaUrl "''${OTEL_GRAFANA_URL:-}" \ + --arg spoolDir "''${OTEL_SPAN_SPOOL_DIR:-}" \ + --arg linkUrl "$_otel_grafana_link_url" \ + --arg modeMessage "$_otel_mode_msg" \ + --arg startMessage "$_otel_start_msg" \ + '{ + devenv: { + env: ( + { + OTEL_MODE: $mode, + OTEL_GRAFANA_LINK_URL: $linkUrl + } + + (if $endpoint != "" then { OTEL_EXPORTER_OTLP_ENDPOINT: $endpoint } else {} end) + + (if $grafanaUrl != "" then { OTEL_GRAFANA_URL: $grafanaUrl } else {} end) + + (if $spoolDir != "" then { OTEL_SPAN_SPOOL_DIR: $spoolDir } else {} end) + ), + messages: [$modeMessage, $startMessage] + } + }' > "$DEVENV_TASK_OUTPUT_FILE" + ''; + before = [ "devenv:enterShell" ]; + after = lib.optionals (builtins.hasAttr "setup:gate" config.tasks) [ "setup:gate" ]; + }; + + tasks."otel:shell-entry" = { + description = "Emit the shell-entry root trace span after setup completes"; + exec = '' + set -euo pipefail + ${otelDetectShellEntryState} + ${otelEmitShellEntry} + detect_otel_shell_entry_state || true + emit_otel_shell_entry_span + ''; + before = [ "devenv:enterShell" ]; + after = + lib.optionals (builtins.hasAttr "devenv:files:cleanup" config.tasks) [ "devenv:files:cleanup" ] + ++ lib.optionals (builtins.hasAttr "devenv:files" config.tasks) [ "devenv:files" ] + ++ [ "otel:shell-env" ] + ++ lib.optionals (builtins.hasAttr "setup:record-cache" config.tasks) [ "setup:record-cache@completed" ] + ++ lib.optionals ( + !(builtins.hasAttr "setup:record-cache" config.tasks) + && builtins.hasAttr "setup:gate" config.tasks + ) [ "setup:gate" ]; + }; + tasks."otel:test" = { description = "Run otel-span shell-level unit tests (offline, no devenv up needed)"; exec = '' @@ -607,6 +706,10 @@ in # so always provide a local default for the offline unit tests. export OTEL_EXPORTER_OTLP_ENDPOINT="''${OTEL_EXPORTER_OTLP_ENDPOINT:-http://127.0.0.1:4318}" + ${otelResolveShellState} + ${otelDetectShellEntryState} + ${otelEmitShellEntry} + _check() { local name="$1" shift @@ -647,7 +750,142 @@ in } _check "Attribute type handling" _test_attr_types - # Test 2: TRACEPARENT propagation + # Test 3: local shell state resolution exports the local stack and a trace link + _test_shell_state_local() { + ( + export OTEL_MODE="local" + export TRACEPARENT="00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01" + export TS_HOSTNAME="ts.example.test" + unset OTEL_GRAFANA_URL OTEL_EXPORTER_OTLP_ENDPOINT OTEL_SPAN_SPOOL_DIR + resolve_otel_shell_state + [ "$OTEL_EXPORTER_OTLP_ENDPOINT" = "http://127.0.0.1:${toString otelCollectorPort}" ] || return 1 + [ "$OTEL_GRAFANA_URL" = "http://127.0.0.1:${toString grafanaPort}" ] || return 1 + [ "$OTEL_SPAN_SPOOL_DIR" = "${spoolDir}" ] || return 1 + echo "$_otel_grafana_link_url" | grep -q 'ts.example.test' || return 1 + echo "$_otel_start_msg" | grep -q 'trace:' || return 1 + ) + } + _check "Shell state resolution (local)" _test_shell_state_local + + # Test 4: system shell state requires an explicit Grafana URL + _test_shell_state_system_requires_grafana() { + ( + export OTEL_MODE="system" + export OTEL_STATE_DIR="$_tmp/system-state" + export OTEL_EXPORTER_OTLP_ENDPOINT="http://collector.example:4318" + unset OTEL_GRAFANA_URL OTEL_SPAN_SPOOL_DIR + otel() { return 0; } + ! resolve_otel_shell_state >/dev/null 2>&1 + ) + } + _check "Shell state resolution (system requires Grafana URL)" _test_shell_state_system_requires_grafana + + # Test 5: shell:entry emission uses explicit shell IDs and ignores ambient parents + _test_shell_entry_root_span() { + local spool="$_tmp/shell-entry-root" + mkdir -p "$spool" + ( + export OTEL_SPAN_SPOOL_DIR="$spool" + export TRACEPARENT="00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01" + export OTEL_SHELL_ENTRY_NS="1234567890000000000" + export OTEL_TASK_TRACEPARENT="00-feedfacefeedfacefeedfacefeedface-2222222222222222-01" + _cold_start="false" + _reload_trigger="initial" + + emit_otel_shell_entry_span + + [ "$TRACEPARENT" = "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01" ] || return 1 + [ -z "''${OTEL_TASK_TRACEPARENT:-}" ] || return 1 + [ -z "''${OTEL_SHELL_ENTRY_NS:-}" ] || return 1 + ) + + [ -f "$spool/spans.jsonl" ] || return 1 + + local line actual_trace actual_span has_parent + line=$(head -1 "$spool/spans.jsonl") + actual_trace=$(echo "$line" | ${pkgs.jq}/bin/jq -r '.resourceSpans[0].scopeSpans[0].spans[0].traceId') + actual_span=$(echo "$line" | ${pkgs.jq}/bin/jq -r '.resourceSpans[0].scopeSpans[0].spans[0].spanId') + has_parent=$(echo "$line" | ${pkgs.jq}/bin/jq -r '.resourceSpans[0].scopeSpans[0].spans[0] | has("parentSpanId")') + + [ "$actual_trace" = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ] \ + && [ "$actual_span" = "bbbbbbbbbbbbbbbb" ] \ + && [ "$has_parent" = "false" ] + } + _check "shell:entry root span emission" _test_shell_entry_root_span + + # Test 6: shell:entry emission must not depend on PATH already containing + # otel-span because enterShell can run before package PATH setup settles. + _test_shell_entry_root_span_without_path() { + local spool="$_tmp/shell-entry-no-path" + mkdir -p "$spool" + ( + export OTEL_SPAN_SPOOL_DIR="$spool" + export OTEL_EXPORTER_OTLP_ENDPOINT="http://collector.example:4318" + export TRACEPARENT="00-cccccccccccccccccccccccccccccccc-dddddddddddddddd-01" + export OTEL_SHELL_ENTRY_NS="1234567890000000001" + export PATH="/nonexistent" + _cold_start="false" + _reload_trigger="env-change" + + emit_otel_shell_entry_span + + [ "$TRACEPARENT" = "00-cccccccccccccccccccccccccccccccc-dddddddddddddddd-01" ] || return 1 + [ -z "''${OTEL_SHELL_ENTRY_NS:-}" ] || return 1 + ) + + [ -f "$spool/spans.jsonl" ] || return 1 + + local line actual_trace actual_span has_parent + line=$(head -1 "$spool/spans.jsonl") + actual_trace=$(echo "$line" | ${pkgs.jq}/bin/jq -r '.resourceSpans[0].scopeSpans[0].spans[0].traceId') + actual_span=$(echo "$line" | ${pkgs.jq}/bin/jq -r '.resourceSpans[0].scopeSpans[0].spans[0].spanId') + has_parent=$(echo "$line" | ${pkgs.jq}/bin/jq -r '.resourceSpans[0].scopeSpans[0].spans[0] | has("parentSpanId")') + + [ "$actual_trace" = "cccccccccccccccccccccccccccccccc" ] \ + && [ "$actual_span" = "dddddddddddddddd" ] \ + && [ "$has_parent" = "false" ] + } + _check "shell:entry root span emission without PATH" _test_shell_entry_root_span_without_path + + # Test 7: reload-trigger detection uses pinned binaries instead of + # ambient PATH, so the shell-entry task works before GNU tools are added. + _test_shell_entry_state_without_path() { + local workdir="$_tmp/shell-entry-state-no-path" + mkdir -p "$workdir/.devenv" "$workdir/.direnv" + echo "$workdir/foo.nix" > "$workdir/.devenv/input-paths.txt" + echo "x = 1;" > "$workdir/foo.nix" + + ( + cd "$workdir" + export PATH="/nonexistent" + detect_otel_shell_entry_state + [ "$_cold_start" = "true" ] || return 1 + [ "$_reload_trigger" = "initial" ] || return 1 + [ -f ".direnv/otel-watch-mtimes" ] || return 1 + ) + } + _check "shell-entry state detection without PATH" _test_shell_entry_state_without_path + + # Test 8: reload-trigger detection tolerates input paths that disappear + # between eval and shell startup instead of failing the shell-entry task. + _test_shell_entry_state_missing_paths() { + local workdir="$_tmp/shell-entry-state-missing-paths" + mkdir -p "$workdir/.devenv" "$workdir/.direnv" + echo "$workdir/foo.nix" > "$workdir/.devenv/input-paths.txt" + echo "$workdir/missing.nix" >> "$workdir/.devenv/input-paths.txt" + echo "x = 1;" > "$workdir/foo.nix" + + ( + cd "$workdir" + export PATH="/nonexistent" + detect_otel_shell_entry_state + [ "$_reload_trigger" = "initial" ] || return 1 + [ -f ".direnv/otel-watch-mtimes" ] || return 1 + ) + } + _check "shell-entry state detection with missing paths" _test_shell_entry_state_missing_paths + + # Test 9: TRACEPARENT propagation _test_traceparent() { local spool="$_tmp/tp-test" mkdir -p "$spool" @@ -664,14 +902,14 @@ in } _check "TRACEPARENT propagation" _test_traceparent - # Test 3: Spool fallback (nonexistent dir) + # Test 10: Spool fallback (nonexistent dir) _test_spool_fallback() { # With nonexistent spool dir, should still succeed (falls back to curl which may fail silently) OTEL_SPAN_SPOOL_DIR="/nonexistent" OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:1" otel-span run "test" "fallback" -- true >/dev/null 2>&1 } _check "Spool fallback" _test_spool_fallback - # Test 4: Spool file write + # Test 11: Spool file write _test_spool_write() { local spool="$_tmp/write-test" mkdir -p "$spool" @@ -683,7 +921,7 @@ in } _check "Spool write" _test_spool_write - # Test 5: --span-id override + # Test 9: --span-id override _test_span_id_override() { local spool="$_tmp/spanid-test" mkdir -p "$spool" @@ -695,7 +933,7 @@ in } _check "--span-id override" _test_span_id_override - # Test 6: --start-time-ns override + # Test 10: --start-time-ns override _test_start_time_override() { local spool="$_tmp/startns-test" mkdir -p "$spool" @@ -707,7 +945,7 @@ in } _check "--start-time-ns override" _test_start_time_override - # Test 7: --end-time-ns override + # Test 11: --end-time-ns override _test_end_time_override() { local spool="$_tmp/endns-test" mkdir -p "$spool" @@ -719,7 +957,7 @@ in } _check "--end-time-ns override" _test_end_time_override - # Test 8: --log-url outputs Grafana trace URL to stderr + # Test 12: --log-url outputs Grafana trace URL to stderr _test_log_url() { local spool="$_tmp/logurl-test" mkdir -p "$spool" @@ -736,7 +974,7 @@ in } _check "--log-url output" _test_log_url - # Test 9: No trace context produces root span (no parentSpanId) + # Test 13: No trace context produces root span (no parentSpanId) _test_no_traceparent_root() { local spool="$_tmp/root-test" mkdir -p "$spool" @@ -752,7 +990,7 @@ in } _check "No trace context = root span" _test_no_traceparent_root - # Test 10: OTEL_TASK_TRACEPARENT takes precedence over TRACEPARENT + # Test 14: OTEL_TASK_TRACEPARENT takes precedence over TRACEPARENT _test_task_traceparent_precedence() { local spool="$_tmp/task-tp-test" mkdir -p "$spool" @@ -773,7 +1011,7 @@ in } _check "OTEL_TASK_TRACEPARENT precedence" _test_task_traceparent_precedence - # Test 11: --status-attr derives bool from exit code (cached case, exit 0) + # Test 15: --status-attr derives bool from exit code (cached case, exit 0) _test_status_attr_cached() { local spool="$_tmp/status-cached" mkdir -p "$spool" @@ -793,7 +1031,7 @@ in } _check "--status-attr cached (exit 0)" _test_status_attr_cached - # Test 12: --status-attr derives bool from exit code (uncached case, exit 1) + # Test 16: --status-attr derives bool from exit code (uncached case, exit 1) _test_status_attr_uncached() { local spool="$_tmp/status-uncached" mkdir -p "$spool" @@ -813,7 +1051,7 @@ in } _check "--status-attr uncached (exit 1)" _test_status_attr_uncached - # Test 13: --status-attr propagates TRACEPARENT to child (sub-traces) + # Test 17: --status-attr propagates TRACEPARENT to child (sub-traces) _test_status_attr_subtrace() { local spool="$_tmp/status-subtrace" mkdir -p "$spool" @@ -831,7 +1069,7 @@ in } _check "--status-attr sub-trace propagation" _test_status_attr_subtrace - # Test 14: otel-span exports OTEL_TASK_TRACEPARENT to child processes + # Test 18: otel-span exports OTEL_TASK_TRACEPARENT to child processes _test_task_traceparent_export() { local spool="$_tmp/task-tp-export" mkdir -p "$spool" diff --git a/nix/devenv-modules/otel/dashboards/shell-entry.jsonnet b/nix/devenv-modules/otel/dashboards/shell-entry.jsonnet index 875dbbf61..392fa4adb 100644 --- a/nix/devenv-modules/otel/dashboards/shell-entry.jsonnet +++ b/nix/devenv-modules/otel/dashboards/shell-entry.jsonnet @@ -1,7 +1,7 @@ // Shell Entry (enterShell) dashboard // How long do shell entry tasks take, with breakdown by task. // -// Shell entry runs optional tasks: pnpm:install, genie:run, mr:sync, ts:emit +// Shell entry runs optional tasks: pnpm:install, genie:run, mr:apply // These tasks are only executed when their dependencies change (git hash caching). // Use FORCE_SETUP=1 to force re-run even when cached. local g = import 'g.libsonnet'; @@ -30,7 +30,7 @@ local traceTable(title, query, limit=50) = g.dashboard.new('Shell Entry Performance') + g.dashboard.withUid('otel-shell-entry') -+ g.dashboard.withDescription('Performance breakdown of devenv shell entry tasks (pnpm:install, genie:run, mr:sync, ts:emit)') ++ g.dashboard.withDescription('Performance breakdown of devenv shell entry tasks (pnpm:install, genie:run, mr:apply)') + g.dashboard.graphTooltip.withSharedCrosshair() + g.dashboard.withTimezone('browser') + g.dashboard.withPanels( @@ -39,8 +39,8 @@ g.dashboard.new('Shell Entry Performance') g.panel.row.new('Shell Entry Tasks'), traceTable( - 'All shell entry tasks (pnpm:install, genie:run, mr:sync, ts:emit)', - '{resource.service.name="dt-task" && name=~"pnpm:install|genie:run|mr:sync|ts:emit"}', + 'All shell entry tasks (pnpm:install, genie:run, mr:apply)', + '{resource.service.name="dt-task" && name=~"pnpm:install|genie:run|mr:apply"}', 50, ), @@ -60,14 +60,8 @@ g.dashboard.new('Shell Entry Performance') ), traceTable( - 'mr:sync', - '{resource.service.name="dt-task" && name="mr:sync"}', - 30, - ), - - traceTable( - 'ts:emit', - '{resource.service.name="dt-task" && name="ts:emit"}', + 'mr:apply', + '{resource.service.name="dt-task" && name="mr:apply"}', 30, ), ], panelWidth=24, panelHeight=10) diff --git a/nix/devenv-modules/tasks/shared/check-node-modules-projection-health.cjs b/nix/devenv-modules/tasks/shared/check-node-modules-projection-health.cjs index d7f728452..d4c004c79 100644 --- a/nix/devenv-modules/tasks/shared/check-node-modules-projection-health.cjs +++ b/nix/devenv-modules/tasks/shared/check-node-modules-projection-health.cjs @@ -1,12 +1,14 @@ const fs = require('fs') const path = require('path') const { createRequire } = require('module') +const crypto = require('crypto') + +const mode = process.env.NODE_MODULES_HELPER_MODE || 'health' /** - * GVS can leave package symlinks present while still dropping transitive - * projections after config/path changes. Checking only for broken symlinks - * misses that failure mode, so this helper resolves each symlinked package's - * declared runtime deps from the package's real path. + * Keep the node_modules helper logic in one process so the warm status path + * can preserve its exact structural fingerprint semantics without paying for + * hundreds of shell-level `readlink` subprocesses. */ const moduleDirs = (process.env.NODE_MODULES_DIRS || '') .split('\n') @@ -15,9 +17,23 @@ const moduleDirs = (process.env.NODE_MODULES_DIRS || '') .filter((value, index, values) => values.indexOf(value) === index) .filter((value) => fs.existsSync(value)) -const dependencyProjectionFailures = [] +const collectProjectionEntryPaths = (nodeModulesDir) => { + const result = [] + for (const entry of fs.readdirSync(nodeModulesDir, { withFileTypes: true })) { + const entryPath = path.join(nodeModulesDir, entry.name) + if (entry.isDirectory()) { + for (const childEntry of fs.readdirSync(entryPath, { withFileTypes: true })) { + result.push(path.join(entryPath, childEntry.name)) + } + continue + } -const collectEntryPaths = (nodeModulesDir) => { + result.push(entryPath) + } + return result.sort() +} + +const collectHealthEntryPaths = (nodeModulesDir) => { const result = [] for (const entry of fs.readdirSync(nodeModulesDir, { withFileTypes: true })) { if (entry.name === '.bin' || entry.name === '.pnpm') continue @@ -55,50 +71,115 @@ const resolveDependencyPackageRoot = ({ requireFromPkg, dependencyName }) => { return undefined } -for (const nodeModulesDir of moduleDirs) { - for (const entryPath of collectEntryPaths(nodeModulesDir)) { - let stat - try { - stat = fs.lstatSync(entryPath) - } catch { +const runProjectionHash = () => { + const hash = crypto.createHash('sha256') + const appendLine = (line) => { + hash.update(line) + hash.update('\n') + } + + for (const nodeModulesDir of moduleDirs) { + if (fs.existsSync(nodeModulesDir) && fs.statSync(nodeModulesDir).isDirectory()) { + appendLine(`dir ${nodeModulesDir}`) + } else { + appendLine(`missing ${nodeModulesDir}`) continue } - if (!stat.isSymbolicLink()) continue + for (const entryPath of collectProjectionEntryPaths(nodeModulesDir)) { + let stat + try { + stat = fs.lstatSync(entryPath) + } catch { + continue + } - let realPath - try { - realPath = fs.realpathSync(entryPath) - } catch { - continue - } + if (!stat.isSymbolicLink()) continue + + let target = '' + try { + target = fs.readlinkSync(entryPath) + } catch {} - const packageJsonPath = path.join(realPath, 'package.json') - if (!fs.existsSync(packageJsonPath)) continue - - const pkg = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')) - const dependencyNames = Object.keys(pkg.dependencies ?? {}) - if (dependencyNames.length === 0) continue - - const requireFromPkg = createRequire(packageJsonPath) - for (const dependencyName of dependencyNames) { - if ( - resolveDependencyPackageRoot({ - requireFromPkg, - dependencyName, - }) === undefined - ) { - dependencyProjectionFailures.push( - `${pkg.name ?? entryPath} -> ${dependencyName} (from ${nodeModulesDir})`, - ) + if (fs.existsSync(entryPath)) { + appendLine(`link ${entryPath} -> ${target}`) + } else { + appendLine(`broken-link ${entryPath} -> ${target}`) } } } + + const rootModulesYamlPath = process.env.PNPM_ROOT_MODULES_YAML || 'node_modules/.modules.yaml' + if (fs.existsSync(rootModulesYamlPath)) { + appendLine( + `modules-yaml ${crypto + .createHash('sha256') + .update(fs.readFileSync(rootModulesYamlPath)) + .digest('hex')}`, + ) + } else { + appendLine('modules-yaml missing') + } + + process.stdout.write(`${hash.digest('hex')}\n`) } -if (dependencyProjectionFailures.length > 0) { - for (const failure of dependencyProjectionFailures) { - console.error(`[pnpm] Missing dependency projection: ${failure}`) +const runHealthCheck = () => { + const dependencyProjectionFailures = [] + for (const nodeModulesDir of moduleDirs) { + for (const entryPath of collectHealthEntryPaths(nodeModulesDir)) { + let stat + try { + stat = fs.lstatSync(entryPath) + } catch { + continue + } + + if (!stat.isSymbolicLink()) continue + + let realPath + try { + realPath = fs.realpathSync(entryPath) + } catch { + continue + } + + const packageJsonPath = path.join(realPath, 'package.json') + if (!fs.existsSync(packageJsonPath)) continue + + const pkg = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')) + const dependencyNames = Object.keys(pkg.dependencies ?? {}) + if (dependencyNames.length === 0) continue + + const requireFromPkg = createRequire(packageJsonPath) + for (const dependencyName of dependencyNames) { + if ( + resolveDependencyPackageRoot({ + requireFromPkg, + dependencyName, + }) === undefined + ) { + dependencyProjectionFailures.push( + `${pkg.name ?? entryPath} -> ${dependencyName} (from ${nodeModulesDir})`, + ) + } + } + } + } + + if (dependencyProjectionFailures.length > 0) { + for (const failure of dependencyProjectionFailures) { + console.error(`[pnpm] Missing dependency projection: ${failure}`) + } + process.exit(1) } +} + +if (mode === 'projection-hash') { + runProjectionHash() +} else if (mode === 'health') { + runHealthCheck() +} else { + console.error(`[pnpm] Unknown node_modules helper mode: ${mode}`) process.exit(1) } diff --git a/nix/devenv-modules/tasks/shared/genie.nix b/nix/devenv-modules/tasks/shared/genie.nix index d8a58ef3a..1e919068f 100644 --- a/nix/devenv-modules/tasks/shared/genie.nix +++ b/nix/devenv-modules/tasks/shared/genie.nix @@ -15,17 +15,98 @@ let trace = import ../lib/trace.nix { inherit lib; }; cliGuard = import ../lib/cli-guard.nix { inherit pkgs; }; + cacheRoot = ".direnv/task-cache/genie-run"; + stateFile = "${cacheRoot}/state.hash"; + generatedFilesFile = "${cacheRoot}/generated-files.txt"; + collectGenieGeneratedFiles = '' + collect_genie_generated_files() { + # Genie owns these markers, so the warm-path fingerprint follows the same + # explicit generated-file contract as the generator itself. + ${pkgs.ripgrep}/bin/rg -l \ + --glob '!tmp/**' \ + --glob '!.git/**' \ + --glob '!.direnv/**' \ + --glob '!.devenv/**' \ + --glob '!node_modules/**' \ + '^// Source: .*\.genie\.ts|^# Source: .*\.genie\.ts' . || true + } + ''; + computeGenieStateHash = '' + compute_genie_state_hash() { + { + if command -v genie >/dev/null 2>&1; then + printf 'genie-path %s\n' "$(command -v genie)" + printf 'genie-version %s\n' "$(genie --version 2>/dev/null | ${pkgs.coreutils}/bin/head -n1 || echo unknown)" + fi + + # Track both the `.genie.ts` sources and the generated files they own so + # warm status checks catch manual drift without booting the full CLI. + ${pkgs.findutils}/bin/find . \ + -type f \ + -name '*.genie.ts' \ + -not -path './.git/*' \ + -not -path './.direnv/*' \ + -not -path './.devenv/*' \ + -not -path './node_modules/*' \ + -print + ${collectGenieGeneratedFiles} + } \ + | LC_ALL=C sort -u \ + | while IFS= read -r file; do + [ -f "$file" ] || continue + printf '%s\n' "$file" + ${pkgs.coreutils}/bin/sha256sum "$file" | awk '{print $1}' + done \ + | ${pkgs.coreutils}/bin/sha256sum \ + | awk '{print $1}' + } + ''; tasks = { "genie:run" = { guard = "genie"; description = "Generate config files from .genie.ts sources"; - exec = trace.exec "genie:run" "genie"; + exec = trace.exec "genie:run" '' + set -euo pipefail + mkdir -p ${lib.escapeShellArg cacheRoot} + ${collectGenieGeneratedFiles} + ${computeGenieStateHash} + genie + cache_value="$(compute_genie_state_hash)" + tmp_file="$(mktemp)" + printf "%s" "$cache_value" > "$tmp_file" + if [ -f ${lib.escapeShellArg stateFile} ] && cmp -s "$tmp_file" ${lib.escapeShellArg stateFile}; then + rm "$tmp_file" + else + mv "$tmp_file" ${lib.escapeShellArg stateFile} + fi + + generated_tmp_file="$(mktemp)" + collect_genie_generated_files | LC_ALL=C sort -u > "$generated_tmp_file" + mv "$generated_tmp_file" ${lib.escapeShellArg generatedFilesFile} + ''; status = trace.status "genie:run" "binary" '' set -euo pipefail - # Skip when generated files are already up to date. - # Silence output to keep shell entry clean. - genie --check >/dev/null 2>&1 + if [ "''${DEVENV_SETUP_OUTER_CACHE_HIT:-0}" = "1" ]; then + # The outer setup fingerprint already covers tracked generated-file + # drift plus genie binary identity. On that warm path, only prove that + # the outputs we generated last time still exist. Content drift is + # intentionally deferred to the next full fingerprint recomputation so + # shell entry does not have to boot the generator or re-hash every + # generated file on every hit. + [ -f ${lib.escapeShellArg stateFile} ] || exit 1 + [ -f ${lib.escapeShellArg generatedFilesFile} ] || exit 1 + while IFS= read -r file; do + [ -n "$file" ] || continue + [ -f "$file" ] || exit 1 + done < ${lib.escapeShellArg generatedFilesFile} + exit 0 + fi + [ -f ${lib.escapeShellArg stateFile} ] || exit 1 + ${computeGenieStateHash} + current_hash="$(compute_genie_state_hash)" + stored_hash="$(cat ${lib.escapeShellArg stateFile})" + [ "$current_hash" = "$stored_hash" ] ''; }; "genie:watch" = { diff --git a/nix/devenv-modules/tasks/shared/megarepo.nix b/nix/devenv-modules/tasks/shared/megarepo.nix index c6de90ae2..4069019f0 100644 --- a/nix/devenv-modules/tasks/shared/megarepo.nix +++ b/nix/devenv-modules/tasks/shared/megarepo.nix @@ -26,6 +26,19 @@ let trace = import ../lib/trace.nix { inherit lib; }; cliGuard = import ../lib/cli-guard.nix { inherit pkgs; }; jq = "${pkgs.jq}/bin/jq"; + cacheRoot = ".direnv/task-cache/mr-apply"; + membersFile = "${cacheRoot}/members.txt"; + recordWorkspaceMembers = '' + set -o pipefail + mkdir -p ${lib.escapeShellArg cacheRoot} + tmp_members_file="$(mktemp)" + # Rewrite the manifest atomically so a failed `mr ls` never leaves behind + # an empty file that would make the warm-path output proof vacuous. + mr ls --output json \ + | ${jq} -r 'select(._tag == "Success") | .value.members[].name' \ + | LC_ALL=C sort -u > "$tmp_members_file" + mv "$tmp_members_file" ${lib.escapeShellArg membersFile} + ''; # Single-pass jq script that compares megarepo.lock member commits against # a Nix lock file (devenv.lock or flake.lock). Handles multiple inputs @@ -62,6 +75,32 @@ let fi return 0 ''; + mrStatusCheck = '' + # Use the already-installed source CLI here. `nix run ...#megarepo` adds a + # second eval/build hop to every warm status check. + if [ ! -f ./megarepo.kdl ] && [ ! -f ./megarepo.json ]; then + exit 0 + fi + + if [ "''${DEVENV_SETUP_OUTER_CACHE_HIT:-0}" = "1" ]; then + [ -d ./repos ] || exit 1 + [ -f ${lib.escapeShellArg membersFile} ] || exit 1 + while IFS= read -r member; do + [ -n "$member" ] || continue + if [ ! -L "./repos/$member" ] && [ ! -d "./repos/$member" ]; then + exit 1 + fi + done < ${lib.escapeShellArg membersFile} + exit 0 + fi + + if [ ! -d ./repos ]; then + exit 1 + fi + + status_json=$(mr status --output json 2>/dev/null) || exit 1 + echo "$status_json" | ${jq} -e '(.workspaceSyncNeeded // false) == false' >/dev/null 2>&1 + ''; tasks = { "mr:sync" = { @@ -73,23 +112,9 @@ let fi mr fetch --apply${if syncAll then " --all" else ""} + ${recordWorkspaceMembers} ''; - # Status: use `mr status --output json` to detect if workspace reconciliation is needed. - status = trace.status "mr:sync" "binary" '' - if [ ! -f ./megarepo.kdl ] && [ ! -f ./megarepo.json ]; then - exit 0 - fi - - # Fast check: if repos/ doesn't exist, definitely need sync - if [ ! -d ./repos ]; then - exit 1 - fi - - # Use mr status to check the workspace-specific boolean - status_json=$(nix run "git+file:$PWD#megarepo" -- status --output json 2>/dev/null) || exit 1 - - echo "$status_json" | ${jq} -e '(.workspaceSyncNeeded // false) == false' >/dev/null 2>&1 - ''; + status = trace.status "mr:sync" "binary" mrStatusCheck; }; "mr:lock" = { @@ -124,7 +149,9 @@ let fi mr apply${if syncAll then " --all" else ""} + ${recordWorkspaceMembers} ''; + status = trace.status "mr:apply" "binary" mrStatusCheck; }; "mr:check" = { diff --git a/nix/devenv-modules/tasks/shared/pnpm.nix b/nix/devenv-modules/tasks/shared/pnpm.nix index 09d0840c1..14a7c809b 100644 --- a/nix/devenv-modules/tasks/shared/pnpm.nix +++ b/nix/devenv-modules/tasks/shared/pnpm.nix @@ -32,7 +32,7 @@ let pnpmTaskHelpersScript = pkgs.writeText "pnpm-task-helpers.sh" ( builtins.readFile ./pnpm-task-helpers.sh ); - nodeModulesProjectionHealthScript = pkgs.writeText "check-node-modules-projection-health.cjs" ( + nodeModulesProjectionScript = pkgs.writeText "check-node-modules-projection-health.cjs" ( builtins.readFile ./check-node-modules-projection-health.cjs ); @@ -117,11 +117,25 @@ let gvs_links_dir="$(resolve_gvs_links_dir)" { + # Keep version probes non-interactive even when the parent shell has an + # open stdin pipe, which is common in CI step wrappers. + printf '%s\n' "$(pnpm --version < /dev/null 2>/dev/null | ${pkgs.coreutils}/bin/head -n1 || echo unknown)" printf '%s\n' "$workspace_state_hash" printf '%s\n' "''${gvs_links_dir:-}" } | compute_hash } ''; + computeProjectionStateHashFn = '' + compute_projection_state_hash() { + # Keep the warm-path fingerprint semantics identical while avoiding the + # shell pipeline's per-link process overhead. The helper hashes the same + # ordered line stream that the previous bash implementation produced. + NODE_MODULES_HELPER_MODE="projection-hash" \ + PNPM_ROOT_MODULES_YAML="node_modules/.modules.yaml" \ + NODE_MODULES_DIRS="$(printf '%s\n' node_modules ${nodeModulesPaths})" \ + ${pkgs.nodejs}/bin/node ${lib.escapeShellArg nodeModulesProjectionScript} + } + ''; runPnpmInstallFn = '' run_pnpm_install() { @@ -148,6 +162,7 @@ let # manifests. The fingerprint also includes the active GVS projection # root because pnpm 11 bakes absolute paths into `links/`. hash_file="${cacheRoot}/install-state.hash" + projection_hash_file="${cacheRoot}/projection-state.hash" lockfile="${cacheRoot}/pnpm-install.lock" exec 200>"$lockfile" @@ -161,6 +176,7 @@ let ${computeWorkspaceStateHash} ${computeInstallStateHashFn} + ${computeProjectionStateHashFn} ${runPnpmInstallFn} # pnpm 11 GVS: hash-based link invalidation. pnpm reuses existing GVS @@ -191,7 +207,7 @@ let fi fi - if [ "$_purged_node_modules" != true ] && ! check_node_modules_links_healthy ${pkgs.nodejs}/bin/node ${lib.escapeShellArg nodeModulesProjectionHealthScript} ${healthCheckNodeModulesPaths}; then + if [ "$_purged_node_modules" != true ] && ! check_node_modules_links_healthy ${pkgs.nodejs}/bin/node ${lib.escapeShellArg nodeModulesProjectionScript} ${healthCheckNodeModulesPaths}; then echo "[pnpm] node_modules projection is stale, purging install state" purge_node_modules node_modules ${nodeModulesPaths} fi @@ -205,24 +221,45 @@ let cache_value="$(compute_install_state_hash)" ${cache.writeCacheFile ''"$hash_file"''} + + cache_value="$(compute_projection_state_hash)" + ${cache.writeCacheFile ''"$projection_hash_file"''} ''; status = trace.status "pnpm:install" "hash" '' set -euo pipefail ${loadPnpmTaskHelpersFn} hash_file="${cacheRoot}/install-state.hash" + projection_hash_file="${cacheRoot}/projection-state.hash" - if [ ! -d node_modules ] || [ ! -f pnpm-lock.yaml ] || [ ! -f "$hash_file" ]; then + if [ ! -d node_modules ] || [ ! -f pnpm-lock.yaml ] || [ ! -f "$hash_file" ] || [ ! -f "$projection_hash_file" ] || [ ! -f node_modules/.modules.yaml ]; then exit 1 fi + if [ "''${DEVENV_SETUP_OUTER_CACHE_HIT:-0}" = "1" ]; then + # Keep shell entry fast by reusing the cached install-state proof and + # only re-validating the realized projection structure here. The full + # semantic health check still runs in the exec path before install can + # be treated as clean again. + ${computeProjectionStateHashFn} + current_projection_hash="$(compute_projection_state_hash)" + stored_projection_hash="$(cat "$projection_hash_file")" + if [ "$current_projection_hash" != "$stored_projection_hash" ]; then + exit 1 + fi + exit 0 + fi + ${computeWorkspaceStateHash} ${computeInstallStateHashFn} + ${computeProjectionStateHashFn} current_hash="$(compute_install_state_hash)" + current_projection_hash="$(compute_projection_state_hash)" stored_hash="$(cat "$hash_file")" - if ! check_node_modules_links_healthy ${pkgs.nodejs}/bin/node ${lib.escapeShellArg nodeModulesProjectionHealthScript} ${healthCheckNodeModulesPaths}; then + stored_projection_hash="$(cat "$projection_hash_file")" + if [ "$current_hash" != "$stored_hash" ]; then exit 1 fi - if [ "$current_hash" != "$stored_hash" ]; then + if [ "$current_projection_hash" != "$stored_projection_hash" ]; then exit 1 fi exit 0 diff --git a/nix/devenv-modules/tasks/shared/setup.nix b/nix/devenv-modules/tasks/shared/setup.nix index 2fd29762d..e0954d1b4 100644 --- a/nix/devenv-modules/tasks/shared/setup.nix +++ b/nix/devenv-modules/tasks/shared/setup.nix @@ -8,7 +8,7 @@ # imports = [ # (taskModules.setup { # requiredTasks = [ ]; -# optionalTasks = [ "pnpm:install" "genie:run" "ts:emit" ]; +# optionalTasks = [ "pnpm:install" "genie:run" "mr:apply" ]; # completionsCliNames = [ "genie" "mr" ]; # }) # ]; @@ -40,12 +40,16 @@ }: let cliGuard = import ../lib/cli-guard.nix { inherit pkgs; }; + cache = import ../lib/cache.nix { inherit config; }; git = "${pkgs.git}/bin/git"; userRequiredTasks = requiredTasks; userOptionalTasks = optionalTasks; completionsEnabled = completionsCliNames != [ ]; completionsTaskName = "setup:completions"; + setupRecordCacheTaskName = "setup:record-cache"; completionsCliList = lib.concatStringsSep " " completionsCliNames; + setupFingerprintFile = cache.mkCachePath "setup-fingerprint"; + setupGitHashFile = cache.mkCachePath "setup-git-hash"; completionsExec = '' shell="" if [ -n "''${FISH_VERSION:-}" ]; then @@ -145,6 +149,138 @@ let setupOptionalTasks = userOptionalTasks ++ lib.optionals completionsEnabled [ completionsTaskName ]; setupTasks = setupRequiredTasks ++ setupOptionalTasks; allSetupTasks = setupTasks; + setupFingerprintEnv = '' + compute_setup_fingerprint() { + resolve_setup_tool_identity() { + _setup_tool="$1" + command -v "$_setup_tool" >/dev/null 2>&1 || return 0 + + _setup_tool_path="$(command -v "$_setup_tool")" + _setup_tool_resolved="$(${pkgs.coreutils}/bin/realpath "$_setup_tool_path" 2>/dev/null || printf '%s\n' "$_setup_tool_path")" + + printf 'tool %s path %s\n' "$_setup_tool" "$_setup_tool_path" + printf 'tool %s resolved %s\n' "$_setup_tool" "$_setup_tool_resolved" + + # Resolved Nix store paths already identify an immutable tool build. For + # mutable shims outside the store, hash the resolved target so upgrades + # still invalidate setup without paying each CLI's startup cost. + if [ -f "$_setup_tool_resolved" ] && [[ "$_setup_tool_resolved" != /nix/store/* ]]; then + printf 'tool %s sha256 %s\n' \ + "$_setup_tool" \ + "$(${pkgs.coreutils}/bin/sha256sum "$_setup_tool_resolved" | awk '{print $1}')" + fi + } + + # This outer fingerprint exists because devenv's built-in `status` + # semantics do not prune a dependency subtree: the scheduler only runs a + # task's status command once that task itself is ready to execute, after + # its upstream dependencies have already been traversed. A cached + # aggregate `devenv:enterShell` task therefore would not avoid the warm + # `pnpm:install` / `genie:run` / `mr:apply` status probes we are trying + # to skip. + # + # We also intentionally avoid `execIfModified` here. Upstream has fixed + # several correctness and performance bugs in that path, but it is still + # the wrong primitive for repo bootstrap: setup invalidation depends on + # generated-file drift, lockfile topology, and shell/task exports rather + # than just a watched file set. Relevant upstream history: + # - #1924: enterShell + execIfModified caching was confusing for exports + # - #2422 / #2469 / #2588: glob walking could explode through node_modules + # - #2577: deletion/removal invalidation needed fixes + # Use git object IDs for tracked inputs and only content-hash dirty files. + # That keeps the warm-shell fingerprint cheap while still reacting to + # untracked/generated drift that git object IDs cannot describe. + _setup_head=$(${git} rev-parse HEAD 2>/dev/null || echo "no-git") + _setup_generated_from_head=$( + ${git} grep -l -E '^// Source: .*\.genie\.ts|^# Source: .*\.genie\.ts' HEAD -- . 2>/dev/null || true + ) + _setup_dirty_files=$( + { + ${git} -c core.quotepath=off ls-files \ + --modified \ + --others \ + --exclude-standard \ + --deduplicate \ + -- \ + ':(glob)**/*.genie.ts' \ + ':(glob)**/package.json' 2>/dev/null || true + + for _setup_file in package.json pnpm-workspace.yaml pnpm-lock.yaml .npmrc megarepo.kdl megarepo.json megarepo.lock; do + if [ -f "$_setup_file" ] && ! ${git} ls-files --error-unmatch -- "$_setup_file" >/dev/null 2>&1; then + printf '%s\n' "$_setup_file" + elif ! ${git} diff --quiet -- "$_setup_file" 2>/dev/null; then + printf '%s\n' "$_setup_file" + fi + done + + printf '%s\n' "$_setup_generated_from_head" \ + | while IFS= read -r _setup_file; do + [ -n "$_setup_file" ] || continue + if [ ! -e "$_setup_file" ] || ! ${git} diff --quiet -- "$_setup_file" 2>/dev/null; then + printf '%s\n' "$_setup_file" + fi + done + } | LC_ALL=C sort -u + ) + + { + printf 'head %s\n' "$_setup_head" + + # Shell-entry tasks can short-circuit to lightweight output checks once + # the repo inputs are unchanged. Include the task tool identities here so + # changing the active pnpm/genie/mr binary still invalidates the outer + # cache and forces the next shell to re-validate or refresh setup. + for _setup_tool in pnpm genie mr; do + resolve_setup_tool_identity "$_setup_tool" + done + + for _setup_file in package.json pnpm-workspace.yaml pnpm-lock.yaml .npmrc megarepo.kdl megarepo.json megarepo.lock; do + ${git} ls-files -s -- "$_setup_file" 2>/dev/null || true + done + + ${git} -c core.quotepath=off ls-files -s -- ':(glob)**/*.genie.ts' ':(glob)**/package.json' 2>/dev/null || true + + printf '%s\n' "$_setup_generated_from_head" \ + | while IFS= read -r _setup_file; do + [ -n "$_setup_file" ] || continue + ${git} ls-files -s -- "$_setup_file" 2>/dev/null || true + done + + printf '%s\n' "$_setup_dirty_files" \ + | while IFS= read -r _setup_file; do + [ -n "$_setup_file" ] || continue + if [ -f "$_setup_file" ]; then + printf 'dirty %s\n' "$_setup_file" + ${pkgs.coreutils}/bin/sha256sum "$_setup_file" | awk '{print $1}' + else + printf 'missing %s\n' "$_setup_file" + fi + done + } \ + | LC_ALL=C sort -u \ + | ${pkgs.coreutils}/bin/sha256sum \ + | awk '{print $1}' + } + + setup_outer_cache_hit() { + _setup_current_fingerprint="$1" + + if [ "''${FORCE_SETUP:-}" = "1" ]; then + return 1 + fi + + if [ ! -f ${lib.escapeShellArg setupFingerprintFile} ]; then + return 1 + fi + + _setup_cached_fingerprint=$(cat ${lib.escapeShellArg setupFingerprintFile} 2>/dev/null || echo "") + if [ "$_setup_current_fingerprint" != "$_setup_cached_fingerprint" ]; then + return 1 + fi + + return 0 + } + ''; in { tasks = cliGuard.stripGuards ( @@ -169,16 +305,24 @@ in # Gate task that fails during rebase, causing dependent tasks to skip. # Uses `before` to inject itself as a dependency of each setup task. # - # OTEL trace propagation: - # Generates a W3C TRACEPARENT and propagates it to dependent tasks via - # devenv's native task output → env mechanism (devenv.env convention). - # When a task writes {"devenv":{"env":{"KEY":"VAL"}}} to $DEVENV_TASK_OUTPUT_FILE, - # devenv injects those as env vars into all subsequent task subprocesses. - # Ref: https://github.com/cachix/devenv/blob/main/devenv-tasks/src/task_state.rs#L134-L154 - # Ref: https://devenv.sh/tasks/ (Task Inputs and Outputs) + # The gate exports its computed cache metadata through devenv's native + # task export channel so every dependent status/exec sees the same + # `DEVENV_SETUP_*` values without re-running the fingerprint logic. + # This keeps us aligned with upstream task plumbing instead of carrying a + # parallel ad-hoc output protocol in this repo. "setup:gate" = lib.mkIf skipDuringRebase { description = "Check if setup should run (fails during rebase to skip setup)"; + exports = [ + "DEVENV_SETUP_OUTER_CACHE_HIT" + "DEVENV_SETUP_FINGERPRINT" + "DEVENV_SETUP_GIT_HASH" + "TRACEPARENT" + "OTEL_SHELL_ENTRY_NS" + ]; exec = '' + set -euo pipefail + ${setupFingerprintEnv} + _git_dir=$(${git} rev-parse --git-dir 2>/dev/null) if [ -d "$_git_dir/rebase-merge" ] || [ -d "$_git_dir/rebase-apply" ]; then echo "Skipping setup during git rebase/cherry-pick" @@ -186,16 +330,23 @@ in exit 1 fi - # Generate root trace context and propagate via devenv task output. - # Dependent tasks automatically receive TRACEPARENT + OTEL_SHELL_ENTRY_NS - # as env vars, linking all shell entry spans into a single trace. - if [ -n "''${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ] && [ -n "''${DEVENV_TASK_OUTPUT_FILE:-}" ]; then + _setup_current_fingerprint="$(compute_setup_fingerprint)" + _setup_git_hash=$(${git} rev-parse HEAD 2>/dev/null || echo "no-git") + if setup_outer_cache_hit "$_setup_current_fingerprint"; then + _setup_outer_cache_hit="1" + else + _setup_outer_cache_hit="0" + fi + + export DEVENV_SETUP_OUTER_CACHE_HIT="$_setup_outer_cache_hit" + export DEVENV_SETUP_FINGERPRINT="$_setup_current_fingerprint" + export DEVENV_SETUP_GIT_HASH="$_setup_git_hash" + + if [ -n "''${OTEL_EXPORTER_OTLP_ENDPOINT:-}" ]; then _root_trace=$(${pkgs.coreutils}/bin/od -An -tx1 -N16 /dev/urandom | tr -d ' \n') _root_span=$(${pkgs.coreutils}/bin/od -An -tx1 -N8 /dev/urandom | tr -d ' \n') - _tp="00-''${_root_trace:0:32}-''${_root_span:0:16}-01" - _now_ns=$(${pkgs.coreutils}/bin/date +%s%N) - printf '{"devenv":{"env":{"TRACEPARENT":"%s","OTEL_SHELL_ENTRY_NS":"%s"}}}' \ - "$_tp" "$_now_ns" > "$DEVENV_TASK_OUTPUT_FILE" + export TRACEPARENT="00-''${_root_trace:0:32}-''${_root_span:0:16}-01" + export OTEL_SHELL_ENTRY_NS="$(${pkgs.coreutils}/bin/date +%s%N)" fi ''; # This makes setup:gate run BEFORE each setup task @@ -203,12 +354,42 @@ in before = allSetupTasks; }; + "${setupRecordCacheTaskName}" = lib.mkIf (setupTasks != [ ]) { + description = "Record the successful setup fingerprint"; + # Persist the outer cache only after the setup tasks finished. Writing it + # earlier would let later warm shells skip work that never completed. + after = lib.optionals skipDuringRebase [ "setup:gate" ] ++ setupTasks; + exec = '' + set -euo pipefail + ${setupFingerprintEnv} + + mkdir -p ${lib.escapeShellArg cache.cacheRoot} + + cache_value="''${DEVENV_SETUP_FINGERPRINT:-$(compute_setup_fingerprint)}" + ${cache.writeCacheFile ''"${setupFingerprintFile}"''} + + cache_value="''${DEVENV_SETUP_GIT_HASH:-$(${git} rev-parse HEAD 2>/dev/null || echo "no-git")}" + ${cache.writeCacheFile ''"${setupGitHashFile}"''} + ''; + status = '' + set -euo pipefail + if [ "''${FORCE_SETUP:-}" = "1" ]; then + exit 1 + fi + if [ "''${DEVENV_SETUP_OUTER_CACHE_HIT:-0}" = "1" ]; then + exit 0 + fi + exit 1 + ''; + }; + # Wire setup tasks to run during shell entry. # Required tasks are hard dependencies; optional tasks use @completed so # failures don't block shell entry. "devenv:enterShell" = { after = setupRequiredTasks - ++ (map (t: "${t}@completed") setupOptionalTasks); + ++ (map (t: "${t}@completed") setupOptionalTasks) + ++ lib.optionals (setupTasks != [ ]) [ "${setupRecordCacheTaskName}@completed" ]; }; # Run setup tasks explicitly. diff --git a/nix/devenv-modules/tasks/shared/tests/pnpm-task-smoke.test.sh b/nix/devenv-modules/tasks/shared/tests/pnpm-task-smoke.test.sh index 24cc86833..9bd15ca73 100644 --- a/nix/devenv-modules/tasks/shared/tests/pnpm-task-smoke.test.sh +++ b/nix/devenv-modules/tasks/shared/tests/pnpm-task-smoke.test.sh @@ -132,8 +132,20 @@ if [ "${1:-}" = "--version" ]; then exit 0 fi if [ "${1:-}" = "install" ]; then - mkdir -p node_modules + mkdir -p node_modules vendor/pkg-v1 touch node_modules/.install-ok + printf '{"name":"pkg","version":"1.0.0"}\n' > vendor/pkg-v1/package.json + ln -snf ../vendor/pkg-v1 node_modules/pkg + # The warm-path status now fingerprints the root projection metadata that + # pnpm always writes on a real install. Keep the smoke fixture aligned with + # that contract so the test still exercises the task logic instead of + # failing on an unrealistically incomplete fake install. + cat > node_modules/.modules.yaml <<'YAML' +hoistPattern: [] +nodeLinker: isolated +storeDir: /tmp/fake-pnpm-store +virtualStoreDir: node_modules/.pnpm +YAML exit 0 fi echo "unexpected fake pnpm invocation: $*" >&2 @@ -218,7 +230,9 @@ echo "Test 2: exec runs fake pnpm and populates cache" export PNPM_HOME="$workspace/.pnpm-home-a" bash "$tmpdir/pnpm-install.exec.sh" test -f "$workspace/.direnv/task-cache/pnpm-install/install-state.hash" + test -f "$workspace/.direnv/task-cache/pnpm-install/projection-state.hash" test -d "$workspace/node_modules" + test -f "$workspace/node_modules/.modules.yaml" ) echo "Test 3: status hits after install with same GVS path" @@ -233,7 +247,61 @@ echo "Test 3: status hits after install with same GVS path" assert_exit_code 0 "$exit_code" "status should hit after install" ) -echo "Test 4: status misses after effective GVS path changes" +echo "Test 4: outer cache hit still misses when projection metadata is missing" +( + cd "$workspace" + export HOME="$tmpdir/home" + export PNPM_HOME="$workspace/.pnpm-home-a" + export DEVENV_SETUP_OUTER_CACHE_HIT=1 + rm -f node_modules/.modules.yaml + set +e + bash "$tmpdir/pnpm-install.status.sh" + exit_code=$? + set -e + assert_exit_code 1 "$exit_code" "outer-hit status should miss when .modules.yaml is missing" +) + +echo "Test 5: exec restores projection metadata after a miss" +( + cd "$workspace" + export HOME="$tmpdir/home" + export PNPM_HOME="$workspace/.pnpm-home-a" + bash "$tmpdir/pnpm-install.exec.sh" + test -f "$workspace/node_modules/.modules.yaml" +) + +echo "Test 6: outer cache hit misses when a projected package symlink breaks" +( + cd "$workspace" + export HOME="$tmpdir/home" + export PNPM_HOME="$workspace/.pnpm-home-a" + export DEVENV_SETUP_OUTER_CACHE_HIT=1 + mkdir -p node_modules/@scope + ln -s ../missing-package node_modules/@scope/broken + set +e + bash "$tmpdir/pnpm-install.status.sh" + exit_code=$? + set -e + assert_exit_code 1 "$exit_code" "outer-hit status should miss when a projected symlink is broken" + rm node_modules/@scope/broken +) + +echo "Test 7: outer cache hit misses when a projected symlink disappears" +( + cd "$workspace" + export HOME="$tmpdir/home" + export PNPM_HOME="$workspace/.pnpm-home-a" + export DEVENV_SETUP_OUTER_CACHE_HIT=1 + bash "$tmpdir/pnpm-install.exec.sh" + rm -f node_modules/pkg + set +e + bash "$tmpdir/pnpm-install.status.sh" + exit_code=$? + set -e + assert_exit_code 1 "$exit_code" "outer-hit status should miss when a projected symlink disappears" +) + +echo "Test 8: status misses after effective GVS path changes" ( cd "$workspace" export HOME="$tmpdir/home" @@ -245,11 +313,11 @@ echo "Test 4: status misses after effective GVS path changes" assert_exit_code 1 "$exit_code" "status should miss when GVS path changes" ) -echo "Test 5: exec invoked pnpm version and install" +echo "Test 9: exec invoked pnpm version and install" grep -qxF -- "--version" "$tmpdir/pnpm.log" grep -q "^install " "$tmpdir/pnpm.log" -echo "Test 6: exec detaches stdin before probing pnpm version" +echo "Test 10: exec detaches stdin before probing pnpm version" ( cd "$workspace" export HOME="$tmpdir/home" @@ -269,19 +337,18 @@ echo "Test 6: exec detaches stdin before probing pnpm version" ) grep -qxF -- "--version" "$tmpdir/pnpm.log" -echo "Test 7: generated test task runs vitest without pnpm exec" +echo "Test 11: generated test task runs vitest without pnpm exec" ( cd "$workspace/packages/demo" output="$(bash "$tmpdir/test-demo.exec.sh")" [ "$output" = "vitest-shim:run" ] ) -echo "Test 8: generated storybook task runs storybook without pnpm exec" +echo "Test 12: generated storybook task runs storybook without pnpm exec" ( cd "$workspace/packages/demo" output="$(bash "$tmpdir/storybook-demo.exec.sh")" [ "$output" = "storybook-shim:build" ] ) - echo "" echo "pnpm task smoke test passed" diff --git a/nix/devenv-modules/tasks/shared/tests/setup-cache.test.sh b/nix/devenv-modules/tasks/shared/tests/setup-cache.test.sh index a8ee300fa..bb7dce413 100755 --- a/nix/devenv-modules/tasks/shared/tests/setup-cache.test.sh +++ b/nix/devenv-modules/tasks/shared/tests/setup-cache.test.sh @@ -1,18 +1,11 @@ #!/usr/bin/env bash -# Tests for setup.nix git hash caching with inner cache awareness +# Tests for setup.nix outer setup fingerprint caching. # -# Validates the two-tier caching design (R5, R11 compliance): -# - Outer tier: git hash -# - Inner tier: per-task content caches (e.g., pnpm-install/*.hash) -# -# Tasks should only be skipped when BOTH tiers are valid. -# If innerCacheDirs is empty, inner cache check is skipped (git-hash-only mode). +# The outer cache only answers whether shell-entry inputs changed. Task-local +# status checks own output validation, so this test intentionally stays focused +# on fingerprint persistence and FORCE_SETUP behavior. set -euo pipefail -# ============================================================================ -# Test helpers -# ============================================================================ - assert_exit_code() { local expected="$1" local actual="$2" @@ -27,194 +20,147 @@ assert_exit_code() { echo " ok: $label" } -# ============================================================================ -# Simulate the gitHashStatus function from setup.nix -# This mirrors the logic so we can test it in isolation -# ============================================================================ +simulate_setup_outer_cache_hit() { + local fingerprint_file="$1" + local current_fingerprint="$2" + local force_setup="${3-}" # Explicit parameter only, ignore env var for testing -simulate_git_hash_status() { - local hash_file="$1" - local cache_root="$2" - local current_hash="$3" - local force_setup="${4-}" # Explicit parameter only, ignore env var for testing - local inner_cache_dirs="${5-pnpm-install}" # space-separated list, empty = git-hash-only - - # Allow bypass via force_setup parameter (NOT env var - env var breaks CI tests) [ "$force_setup" = "1" ] && return 1 local cached - cached=$(cat "$hash_file" 2>/dev/null || echo "") + cached=$(cat "$fingerprint_file" 2>/dev/null || echo "") + [ "$current_fingerprint" = "$cached" ] +} - # If git hash differs, always run - if [ "$current_hash" != "$cached" ]; then - return 1 - fi +simulate_tool_identity() { + local tool_name="$1" + local tool_path="$2" + local resolved_path - # If no inner cache dirs configured, use git-hash-only mode - if [ -z "$inner_cache_dirs" ]; then - return 0 - fi + resolved_path=$(python - <<'PY' "$tool_path" +import pathlib +import sys - # Check each configured inner cache dir for *.hash files - for dir_name in $inner_cache_dirs; do - local cache_dir="$cache_root/$dir_name" - # Directory must exist and contain at least one .hash file - if [ -d "$cache_dir" ]; then - # Simple and reliable: iterate over files and check suffix - for f in "$cache_dir"/*; do - case "$f" in - *.hash) - [ -f "$f" ] && return 0 - ;; - esac - done - fi - done +print(pathlib.Path(sys.argv[1]).resolve()) +PY +) - # No valid inner caches found - run to populate them - return 1 -} + { + printf 'tool %s path %s\n' "$tool_name" "$tool_path" + printf 'tool %s resolved %s\n' "$tool_name" "$resolved_path" -# ============================================================================ -# Test cases -# ============================================================================ + if [ -f "$resolved_path" ] && [[ "$resolved_path" != /nix/store/* ]]; then + printf 'tool %s sha256 %s\n' "$tool_name" "$(shasum -a 256 "$resolved_path" | awk '{print $1}')" + fi + } | shasum -a 256 | awk '{print $1}' +} echo "Running setup-cache tests..." echo "" -# Create temp directory structure test_dir=$(mktemp -d) trap 'rm -rf "$test_dir"' EXIT cache_root="$test_dir/.direnv/task-cache" -hash_file="$cache_root/setup-git-hash" -pnpm_cache_dir="$cache_root/pnpm-install" +fingerprint_file="$cache_root/setup-fingerprint" mkdir -p "$cache_root" -# Test 1: Fresh cache (no git hash file) -> should return 1 (run) -echo "Test 1: Fresh cache (no git hash file)" +echo "Test 1: Fresh cache (no fingerprint file)" set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" +simulate_setup_outer_cache_hit "$fingerprint_file" "abc123" exit_code=$? set -e assert_exit_code 1 "$exit_code" "fresh cache returns 1 (needs to run)" -# Test 2: Matching git hash but NO inner caches -> should return 1 (run) -echo "" -echo "Test 2: Matching git hash but no inner caches" -echo "abc123" > "$hash_file" -set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" -exit_code=$? -set -e -assert_exit_code 1 "$exit_code" "matching hash + no inner caches returns 1 (run to populate)" - -# Test 3: Matching git hash AND inner caches with .hash files -> should return 0 (skip) echo "" -echo "Test 3: Matching git hash + inner caches with .hash files" -mkdir -p "$pnpm_cache_dir" -echo "somehash" > "$pnpm_cache_dir/genie.hash" +echo "Test 2: Matching fingerprint" +echo "abc123" > "$fingerprint_file" set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" +simulate_setup_outer_cache_hit "$fingerprint_file" "abc123" exit_code=$? set -e -assert_exit_code 0 "$exit_code" "matching hash + .hash files returns 0 (skip)" +assert_exit_code 0 "$exit_code" "matching fingerprint returns 0 (skip)" -# Test 4: Different git hash -> should return 1 (run) even with inner caches echo "" -echo "Test 4: Different git hash (inner caches exist)" +echo "Test 3: Different fingerprint" set +e -simulate_git_hash_status "$hash_file" "$cache_root" "def456" +simulate_setup_outer_cache_hit "$fingerprint_file" "def456" exit_code=$? set -e -assert_exit_code 1 "$exit_code" "different hash returns 1 (needs to run)" +assert_exit_code 1 "$exit_code" "different fingerprint returns 1 (needs to run)" -# Test 5: FORCE_SETUP=1 -> should return 1 (run) regardless of cache state echo "" -echo "Test 5: FORCE_SETUP=1 bypasses cache" +echo "Test 4: FORCE_SETUP=1 bypasses cache" set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" "1" +simulate_setup_outer_cache_hit "$fingerprint_file" "abc123" "1" exit_code=$? set -e assert_exit_code 1 "$exit_code" "FORCE_SETUP=1 returns 1 (always run)" -# Test 6: Empty inner cache directory -> should return 1 (run) echo "" -echo "Test 6: Empty inner cache directory" -rm -f "$pnpm_cache_dir"/* +echo "Test 5: Empty fingerprint file" +: > "$fingerprint_file" set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" +simulate_setup_outer_cache_hit "$fingerprint_file" "abc123" exit_code=$? set -e -assert_exit_code 1 "$exit_code" "empty inner cache dir returns 1 (run to populate)" +assert_exit_code 1 "$exit_code" "empty fingerprint file returns 1 (needs to run)" -# Test 7: Inner cache with multiple .hash files -> should return 0 (skip) echo "" -echo "Test 7: Multiple inner cache .hash files" -echo "hash1" > "$pnpm_cache_dir/genie.hash" -echo "hash2" > "$pnpm_cache_dir/megarepo.hash" -echo "hash3" > "$pnpm_cache_dir/utils.hash" +echo "Test 6: Trailing newline in cache file still matches" +printf 'abc123\n' > "$fingerprint_file" set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" +simulate_setup_outer_cache_hit "$fingerprint_file" "abc123" exit_code=$? set -e -assert_exit_code 0 "$exit_code" "multiple .hash files returns 0 (skip)" +assert_exit_code 0 "$exit_code" "cached newline-trimmed fingerprint returns 0 (skip)" -# Test 8: Inner cache with only non-.hash files -> should return 1 (run) echo "" -echo "Test 8: Inner cache with only non-.hash files (false positive prevention)" -rm -f "$pnpm_cache_dir"/* -echo "not a hash" > "$pnpm_cache_dir/some.lock" -echo "also not" > "$pnpm_cache_dir/partial.tmp" +echo "Test 7: Similar but different fingerprint does not false-hit" set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" +simulate_setup_outer_cache_hit "$fingerprint_file" "abc1234" exit_code=$? set -e -assert_exit_code 1 "$exit_code" "non-.hash files returns 1 (run to populate proper caches)" +assert_exit_code 1 "$exit_code" "different fingerprint text returns 1 (needs to run)" -# Test 9: Git-hash-only mode (empty innerCacheDirs) -> should return 0 when hash matches echo "" -echo "Test 9: Git-hash-only mode (innerCacheDirs='')" -rm -rf "$pnpm_cache_dir" # Remove inner caches entirely -set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" "" "" -exit_code=$? -set -e -assert_exit_code 0 "$exit_code" "git-hash-only mode returns 0 when hash matches" - -# Test 10: Git-hash-only mode with different hash -> should return 1 (run) -echo "" -echo "Test 10: Git-hash-only mode with different hash" -set +e -simulate_git_hash_status "$hash_file" "$cache_root" "xyz999" "" "" -exit_code=$? -set -e -assert_exit_code 1 "$exit_code" "git-hash-only mode returns 1 when hash differs" +echo "Test 8: Mutable tool target content invalidates fingerprint" +tool_dir="$test_dir/tool" +mkdir -p "$tool_dir/bin" "$tool_dir/pkg-v1" "$tool_dir/pkg-v2" +printf 'echo v1\n' > "$tool_dir/pkg-v1/tool" +printf 'echo v2\n' > "$tool_dir/pkg-v2/tool" +chmod +x "$tool_dir/pkg-v1/tool" "$tool_dir/pkg-v2/tool" +ln -s ../pkg-v1/tool "$tool_dir/bin/tool" + +tool_fp_v1=$(simulate_tool_identity tool "$tool_dir/bin/tool") +ln -sf ../pkg-v2/tool "$tool_dir/bin/tool" +tool_fp_v2=$(simulate_tool_identity tool "$tool_dir/bin/tool") + +if [ "$tool_fp_v1" = "$tool_fp_v2" ]; then + echo "FAIL: retargeting mutable tool should change fingerprint" + exit 1 +fi +echo " ok: retargeting mutable tool changes fingerprint" -# Test 11: Multiple inner cache dirs, only one has .hash files -> should return 0 (skip) echo "" -echo "Test 11: Multiple inner cache dirs, partial population" -mkdir -p "$pnpm_cache_dir" -mkdir -p "$cache_root/other-cache" -echo "hash1" > "$pnpm_cache_dir/genie.hash" -# other-cache has no .hash files -set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" "" "pnpm-install other-cache" -exit_code=$? -set -e -assert_exit_code 0 "$exit_code" "at least one valid inner cache returns 0 (skip)" - -# Test 12: Multiple inner cache dirs, none have .hash files -> should return 1 (run) -echo "" -echo "Test 12: Multiple inner cache dirs, none populated" -rm -f "$pnpm_cache_dir"/*.hash -echo "not a hash" > "$cache_root/other-cache/lock.file" -set +e -simulate_git_hash_status "$hash_file" "$cache_root" "abc123" "" "pnpm-install other-cache" -exit_code=$? -set -e -assert_exit_code 1 "$exit_code" "no valid inner caches returns 1 (run)" +echo "Test 9: Nix store style tool path fingerprints by resolved path" +store_dir="$test_dir/nix/store/hash-demo-tool/bin" +mkdir -p "$store_dir" +printf 'echo store-tool\n' > "$store_dir/tool" +chmod +x "$store_dir/tool" +ln -s "$store_dir/tool" "$tool_dir/bin/store-tool" + +store_fp_1=$(simulate_tool_identity store-tool "$tool_dir/bin/store-tool") +mv "$test_dir/nix/store/hash-demo-tool" "$test_dir/nix/store/hash-demo-tool-2" +ln -sf "$test_dir/nix/store/hash-demo-tool-2/bin/tool" "$tool_dir/bin/store-tool" +store_fp_2=$(simulate_tool_identity store-tool "$tool_dir/bin/store-tool") + +if [ "$store_fp_1" = "$store_fp_2" ]; then + echo "FAIL: changing resolved store path should change fingerprint" + exit 1 +fi +echo " ok: resolved store path change invalidates fingerprint" echo "" echo "All setup-cache tests passed" diff --git a/nix/devenv-modules/tasks/shared/tests/ts-task-smoke.test.sh b/nix/devenv-modules/tasks/shared/tests/ts-task-smoke.test.sh new file mode 100644 index 000000000..a7b17ec35 --- /dev/null +++ b/nix/devenv-modules/tasks/shared/tests/ts-task-smoke.test.sh @@ -0,0 +1,232 @@ +#!/usr/bin/env bash +set -euo pipefail + +TESTS_DIR="$(cd "$(dirname "$0")" && pwd)" +ROOT="$(cd "$TESTS_DIR/../../../../.." && pwd)" + +assert_exit_code() { + local expected="$1" + local actual="$2" + local label="$3" + + if [ "$expected" != "$actual" ]; then + echo "FAIL: $label" + echo " expected exit code: $expected" + echo " actual exit code: $actual" + exit 1 + fi +} + +extract_ts_emit_script() { + local attr="$1" + local output_path="$2" + + nix eval --impure --raw --expr " + let + flake = builtins.getFlake (toString $ROOT); + pkgs = import flake.inputs.nixpkgs { system = builtins.currentSystem; }; + evaluated = pkgs.lib.evalModules { + modules = [ + ({ ... }: { + options.tasks = pkgs.lib.mkOption { type = pkgs.lib.types.attrsOf pkgs.lib.types.anything; default = { }; }; + options.processes = pkgs.lib.mkOption { type = pkgs.lib.types.attrsOf pkgs.lib.types.anything; default = { }; }; + options.packages = pkgs.lib.mkOption { type = pkgs.lib.types.listOf pkgs.lib.types.anything; default = [ ]; }; + }) + ((import $ROOT/nix/devenv-modules/tasks/shared/ts.nix { + tsconfigFile = \"tsconfig.all.json\"; + tscBin = \"tsc\"; + }) { + pkgs = pkgs; + lib = pkgs.lib; + config = { }; + }) + ]; + }; + in evaluated.config.tasks.\"ts:emit\".${attr} + " > "$output_path" + chmod +x "$output_path" +} + +echo "Running ts task smoke test..." +echo "" + +tmpdir="$(mktemp -d)" +trap 'rm -rf "$tmpdir"' EXIT + +workspace="$tmpdir/workspace" +mkdir -p \ + "$workspace/node_modules/typescript" \ + "$workspace/packages/no-emit" \ + "$workspace/packages/emit" \ + "$tmpdir/bin" + +cat > "$workspace/tsconfig.all.json" <<'EOF' +{ + // Root-level comment should be ignored + "files": [], + "references": [ + { "path": "packages/no-emit/tsconfig.json" }, // explicit file path + // This mid-file comment used to break the old JSON.parse path. + { "path": "packages/emit" } + ] +} +EOF + +cat > "$workspace/packages/no-emit/tsconfig.json" <<'EOF' +{ + "compilerOptions": { + // This comment is intentionally mid-file. + "composite": true, + "noEmit": true + } +} +EOF + +cat > "$workspace/packages/emit/tsconfig.json" <<'EOF' +{ + "compilerOptions": { + "composite": true, + // Keep this project in the emit graph. + "declaration": true + } +} +EOF + +cat > "$workspace/node_modules/typescript/package.json" <<'EOF' +{"name":"typescript","main":"./index.js"} +EOF + +cat > "$workspace/node_modules/typescript/index.js" <<'EOF' +const stripLineComments = (source) => { + let result = '' + let inString = false + let escaped = false + + for (let index = 0; index < source.length; index += 1) { + const char = source[index] + const next = source[index + 1] + + if (inString) { + result += char + if (escaped) { + escaped = false + } else if (char === '\\\\') { + escaped = true + } else if (char === '"') { + inString = false + } + continue + } + + if (char === '"') { + inString = true + result += char + continue + } + + if (char === '/' && next === '/') { + while (index < source.length && source[index] !== '\n') { + index += 1 + } + if (index < source.length) { + result += '\n' + } + continue + } + + result += char + } + + return result +} + +const parseJsonc = (source) => + JSON.parse( + stripLineComments(source) + .replace(/\/\*[\s\S]*?\*\//g, '') + .replace(/,\s*([}\]])/g, '$1') + ) + +exports.readConfigFile = (filePath, readFile) => { + try { + return { config: parseJsonc(readFile(filePath)) } + } catch (error) { + return { error: { messageText: String(error.message ?? error) } } + } +} +EOF + +cat > "$tmpdir/bin/tsc" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf '%s\n' "$*" >> "${TEST_TSC_LOG:?}" + +config_path="" +prev="" +for arg in "$@"; do + if [ "$prev" = "--build" ]; then + config_path="$arg" + break + fi + prev="$arg" +done + +if [ -z "$config_path" ]; then + echo "missing --build tsconfig path" >&2 + exit 1 +fi + +TEST_CAPTURED_TSCONFIG="${TEST_CAPTURED_TSCONFIG:?}" \ +node - "$config_path" <<'NODE' +const fs = require('node:fs') + +const [configPath] = process.argv.slice(2) +const config = JSON.parse(fs.readFileSync(configPath, 'utf8')) + +if (!Array.isArray(config.references)) { + throw new Error('references missing from generated emit tsconfig') +} + +const paths = config.references.map((reference) => reference.path) +if (paths.includes('packages/no-emit/tsconfig.json')) { + throw new Error('noEmit project should be removed from generated emit tsconfig') +} +if (!paths.includes('packages/emit')) { + throw new Error('emit project should remain in generated emit tsconfig') +} + +fs.copyFileSync(configPath, process.env.TEST_CAPTURED_TSCONFIG) +NODE +EOF +chmod +x "$tmpdir/bin/tsc" + +extract_ts_emit_script "exec" "$tmpdir/ts-emit.exec.sh" +extract_ts_emit_script "status" "$tmpdir/ts-emit.status.sh" + +export PATH="$tmpdir/bin:$PATH" +export TEST_TSC_LOG="$tmpdir/tsc.log" +export TEST_CAPTURED_TSCONFIG="$tmpdir/captured-tsconfig.json" + +echo "Test 1: ts:emit exec filters noEmit refs even with inline comments" +( + cd "$workspace" + bash "$tmpdir/ts-emit.exec.sh" +) +test -f "$TEST_CAPTURED_TSCONFIG" + +echo "Test 2: ts:emit status uses the same filtered graph" +( + cd "$workspace" + : > "$TEST_TSC_LOG" + rm -f "$TEST_CAPTURED_TSCONFIG" + set +e + bash "$tmpdir/ts-emit.status.sh" + exit_code=$? + set -e + assert_exit_code 0 "$exit_code" "ts:emit status should succeed for an already-clean filtered graph" +) +test -f "$TEST_CAPTURED_TSCONFIG" +grep -q -- '--dry --noCheck --verbose --pretty false' "$TEST_TSC_LOG" + +echo "" +echo "ts task smoke test passed" diff --git a/nix/devenv-modules/tasks/shared/ts.nix b/nix/devenv-modules/tasks/shared/ts.nix index 9a76b730e..4379e6962 100644 --- a/nix/devenv-modules/tasks/shared/ts.nix +++ b/nix/devenv-modules/tasks/shared/ts.nix @@ -38,6 +38,66 @@ let trace = import ../lib/trace.nix { inherit lib; }; cliGuard = import ../lib/cli-guard.nix { inherit pkgs; }; + emitTsconfigHelper = '' + generate_emit_tsconfig() { + local source_tsconfig="$1" + local target_tsconfig="$2" + + # `tsc --build --dry --noCheck` still treats `noEmit` references as emit + # work, which made `ts:emit` look perpetually stale. Build a filtered + # graph just for this task instead of mutating the checked-in config. + ${pkgs.nodejs}/bin/node - "$source_tsconfig" "$target_tsconfig" <<'NODE' +const fs = require('node:fs') +const path = require('node:path') + +const [sourceTsconfig, targetTsconfig] = process.argv.slice(2) + +const loadTypescript = () => { + try { + return require(require.resolve('typescript', { paths: [path.dirname(sourceTsconfig), process.cwd()] })) + } catch (error) { + throw new Error( + 'Unable to resolve TypeScript while preparing ts:emit: ' + + String(error?.message ?? error) + ) + } +} + +const typescript = loadTypescript() + +const readTsconfig = (filePath) => { + const parsed = typescript.readConfigFile(filePath, (path) => fs.readFileSync(path, 'utf8')) + if (parsed.error) { + const message = typeof parsed.error.messageText === 'string' + ? parsed.error.messageText + : JSON.stringify(parsed.error.messageText) + throw new Error('Failed to parse ' + filePath + ': ' + message) + } + return parsed.config +} + +const resolveReferenceTsconfig = (referencePath) => { + const resolvedPath = path.resolve(baseDir, referencePath) + return path.extname(resolvedPath) ? resolvedPath : path.join(resolvedPath, 'tsconfig.json') +} + +const rootConfig = readTsconfig(sourceTsconfig) +const baseDir = path.dirname(sourceTsconfig) + +rootConfig.references = (rootConfig.references ?? []).filter((reference) => { + const refTsconfig = resolveReferenceTsconfig(reference.path) + if (!fs.existsSync(refTsconfig)) { + return true + } + + const refConfig = readTsconfig(refTsconfig) + return refConfig.compilerOptions?.noEmit !== true +}) + +fs.writeFileSync(targetTsconfig, JSON.stringify(rootConfig)) +NODE + } + ''; # Script that runs tsc with --extendedDiagnostics --verbose, # parses per-project timing, and emits OTEL child spans. @@ -183,11 +243,28 @@ let }; "ts:emit" = trace.withStatus "ts:emit" "binary" { description = "Emit build outputs without full type checking (tsc --build --noCheck)"; - exec = tscWithDiagnostics "--build ${tsconfigFile}" "--noCheck"; + exec = '' + set -euo pipefail + ${emitTsconfigHelper} + # Create the filtered config next to the source tsconfig so referenced + # project paths stay relative to the workspace instead of `/tmp`. + _emit_tmpdir="$(dirname "${tsconfigFile}")" + _emit_tsconfig="$(mktemp "$_emit_tmpdir/.ts-emit-XXXXXX.json")" + trap 'rm -f "$_emit_tsconfig"' EXIT + generate_emit_tsconfig "${tsconfigFile}" "$_emit_tsconfig" + ${tscWithDiagnostics "--build \"$_emit_tsconfig\"" "--noCheck"} + ''; status = '' set -euo pipefail + ${emitTsconfigHelper} - _out="$(${tscBin} --build ${tsconfigFile} --dry --noCheck --verbose --pretty false 2>&1)" || exit 1 + # Reuse the same filtered graph for the dry-run status check so warm + # caching answers the same question as the real emit command. + _emit_tmpdir="$(dirname "${tsconfigFile}")" + _emit_tsconfig="$(mktemp "$_emit_tmpdir/.ts-emit-XXXXXX.json")" + trap 'rm -f "$_emit_tsconfig"' EXIT + generate_emit_tsconfig "${tsconfigFile}" "$_emit_tsconfig" + _out="$(${tscBin} --build "$_emit_tsconfig" --dry --noCheck --verbose --pretty false 2>&1)" || exit 1 # tsc --build --dry reports pending work as: # - "A non-dry build would build project ..." # - "A non-dry build would update timestamps for output of project ..." diff --git a/packages/@overeng/genie/nix/build.nix b/packages/@overeng/genie/nix/build.nix index 6060173ed..35136fd18 100644 --- a/packages/@overeng/genie/nix/build.nix +++ b/packages/@overeng/genie/nix/build.nix @@ -24,7 +24,7 @@ let # Managed by `dt nix:hash:genie` — do not edit manually. depsBuilds = { "." = { - hash = "sha256-DYt0k6BQrWvznTL1cGnb66Nynaq8Jlr35x95xCx7PAo="; + hash = "sha256-5nJ7lBCuq5Vz6AqUki6MIJM3WF32UW9v5yCDv6LCyFU="; }; }; inherit gitRev commitTs dirty; diff --git a/packages/@overeng/megarepo/nix/build.nix b/packages/@overeng/megarepo/nix/build.nix index 978a22d0b..3e144730d 100644 --- a/packages/@overeng/megarepo/nix/build.nix +++ b/packages/@overeng/megarepo/nix/build.nix @@ -23,7 +23,7 @@ let # Managed by `dt nix:hash:megarepo` — do not edit manually. depsBuilds = { "." = { - hash = "sha256-lkMbv1HbXuQHbVa5C8D/Nd/hOc95j8k6PkfartCgxdo="; + hash = "sha256-5h/sfU8jgeNdmKxJA7nK+jGjFqMsCXX6R65DxxiB7I0="; }; }; smokeTestArgs = [ "--help" ];