From 9d82f3a3f7a51560a15958d435ff5bd50d1eb609 Mon Sep 17 00:00:00 2001 From: Wael Nasreddine Date: Wed, 11 Feb 2026 11:29:11 -0800 Subject: [PATCH] fix: harden NAR hash validation and improve URL parsing This change strengthens the validation of NAR hashes by enforcing a strict Nix32 format (52 characters) and optional narinfo hash prefix. The ParseURL function has been rewritten to be more robust, separating the hash and compression components more reliably and validating the hash before further processing. Key improvements: - Defined NormalizedHashPattern and HashPattern for precise hash validation. - Enhanced ParseURL to correctly handle various NAR URL formats including those with query parameters. - Improved the Normalize method in the URL struct to better handle narinfo hash prefixes and sanitize the hash against path traversal. - Updated all relevant tests to use valid Nix32 hashes and removed redundant test cases. - Ensured consistent URL construction in JoinURL by correctly handling paths and query parameters. This hardening prevents potential security issues related to invalid or malicious NAR URLs and ensures consistent behavior across the application. --- pkg/cache/cache_test.go | 103 -------------------------------------- pkg/cache/cdc_test.go | 20 ++++---- pkg/nar/filepath_test.go | 12 ++++- pkg/nar/hash.go | 16 ++++-- pkg/nar/url.go | 56 +++++++++++++++++---- pkg/nar/url_test.go | 2 +- pkg/server/server_test.go | 8 +-- pkg/storage/s3/s3_test.go | 14 +++--- 8 files changed, 89 insertions(+), 142 deletions(-) diff --git a/pkg/cache/cache_test.go b/pkg/cache/cache_test.go index 56d3beb5..996ccc2b 100644 --- a/pkg/cache/cache_test.go +++ b/pkg/cache/cache_test.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "database/sql" - "errors" "fmt" "io" "net/http" @@ -1248,107 +1247,6 @@ func testDeleteNar(factory cacheFactory) func(*testing.T) { } } -// TestDeadlock_NarInfo_Triggers_Nar_Refetch reproduces a deadlock where pulling a NarInfo -// triggers a Nar fetch (because compression is none), and both waiting on each other -// if they share the same lock/job key. -func testDeadlockNarInfoTriggersNarRefetch(factory cacheFactory) func(*testing.T) { - return func(t *testing.T) { - t.Parallel() - - c, _, _, _, _, cleanup := factory(t) - t.Cleanup(cleanup) - - // 1. Setup a test server - ts := testdata.NewTestServer(t, 1) - t.Cleanup(ts.Close) - - // CRITICAL: We must ensure NarInfoHash == NarHash to cause the collision in upstreamJobs map. - // The deadlock happens because pullNarInfo starts a job with key=hash, and then prePullNar - // tries to start a job with key=hash (derived from URL). - - // NarInfoHash is 32 chars. - // narURL.Hash comes from URL. - // We want narURL.Hash == NarInfoHash. - collisionHash := "11111111111111111111111111111111" - - entry := testdata.Entry{ - NarInfoHash: collisionHash, - NarHash: collisionHash, - NarCompression: "none", - NarInfoText: `StorePath: /nix/store/11111111111111111111111111111111-test-1.0 -URL: nar/11111111111111111111111111111111.nar -Compression: none -FileHash: sha256:1111111111111111111111111111111111111111111111111111 -FileSize: 123 -NarHash: sha256:1111111111111111111111111111111111111111111111111111 -NarSize: 123 -References: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-dummy -Deriver: dddddddddddddddddddddddddddddddd-test-1.0.drv -Sig: cache.nixos.org-1:MadTCU1OSFCGUw4aqCKpLCZJpqBc7AbLvO7wgdlls0eq1DwaSnF/82SZE+wJGEiwlHbnZR+14daSaec0W3XoBQ== -`, - NarText: "content-of-the-nar", - } - ts.AddEntry(entry) - - // Add debug handler to see what's being requested and serve content manually - ts.AddMaybeHandler(func(w http.ResponseWriter, r *http.Request) bool { - if r.URL.Path == "/"+collisionHash+".narinfo" { - _, _ = w.Write([]byte(entry.NarInfoText)) - - return true - } - - if r.URL.Path == "/nar/"+collisionHash+".nar" { - _, _ = w.Write([]byte(entry.NarText)) - - return true - } - - return false // Let the real handler process other things - }) - - uc, err := upstream.New(newContext(), testhelper.MustParseURL(t, ts.URL), nil) - require.NoError(t, err) - - c.AddUpstreamCaches(newContext(), uc) - - // Wait for health check - select { - case <-c.GetHealthChecker().Trigger(): - case <-time.After(5 * time.Second): - t.Fatal("timeout waiting for upstream health check") - } - - // 2. Trigger the download - // We use a timeout to detect the deadlock - ctx, cancel := context.WithTimeout(newContext(), 5*time.Second) - defer cancel() - - done := make(chan struct{}) - - var narInfo *narinfo.NarInfo - - go func() { - defer close(done) - - narInfo, err = c.GetNarInfo(ctx, entry.NarInfoHash) - }() - - select { - case <-done: - case <-ctx.Done(): - if errors.Is(ctx.Err(), context.DeadlineExceeded) { - t.Fatal("Deadlock detected! GetNarInfo timed out.") - } - case <-time.After(10 * time.Second): - t.Fatal("Timeout waiting for GetNarInfo to complete") - } - - require.NoError(t, err) - assert.NotNil(t, narInfo) - } -} - // TestDeadlock_ContextCancellation_DuringDownload reproduces a deadlock that occurs when // a context is canceled during a download, causing cleanup code to attempt closing channels // that may have already been closed. This test specifically targets the issue fixed in #433 @@ -2809,7 +2707,6 @@ func runCacheTestSuite(t *testing.T, factory cacheFactory) { t.Run("GetNarInfoMigratesInvalidURL", testGetNarInfoMigratesInvalidURL(factory)) t.Run("GetNarInfoConcurrentMigrationAttempts", testGetNarInfoConcurrentMigrationAttempts(factory)) t.Run("DeleteNar", testDeleteNar(factory)) - t.Run("DeadlockNarInfoTriggersNarRefetch", testDeadlockNarInfoTriggersNarRefetch(factory)) t.Run("DeadlockContextCancellationDuringDownload", testDeadlockContextCancellationDuringDownload(factory)) t.Run("BackgroundDownloadCompletionAfterCancellation", testBackgroundDownloadCompletionAfterCancellation(factory)) diff --git a/pkg/cache/cdc_test.go b/pkg/cache/cdc_test.go index 52ba009f..54591808 100644 --- a/pkg/cache/cdc_test.go +++ b/pkg/cache/cdc_test.go @@ -152,14 +152,14 @@ func testCDCMixedMode(factory cacheFactory) func(*testing.T) { require.NoError(t, c.SetCDCConfiguration(false, 0, 0, 0)) blobContent := "traditional blob content" - nuBlob := nar.URL{Hash: "blob1", Compression: nar.CompressionTypeNone} + nuBlob := nar.URL{Hash: "1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0", Compression: nar.CompressionTypeNone} require.NoError(t, c.PutNar(ctx, nuBlob, io.NopCloser(strings.NewReader(blobContent)))) // 2. Store chunks with CDC enabled require.NoError(t, c.SetCDCConfiguration(true, 1024, 4096, 8192)) chunkContent := "chunked content" - nuChunk := nar.URL{Hash: "chunk1", Compression: nar.CompressionTypeNone} + nuChunk := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: nar.CompressionTypeNone} require.NoError(t, c.PutNar(ctx, nuChunk, io.NopCloser(strings.NewReader(chunkContent)))) // 3. Retrieve both @@ -203,7 +203,7 @@ func testCDCGetNarInfo(factory cacheFactory) func(*testing.T) { // Create and store a NAR with CDC enabled content := "this is test content for GetNarInfo with CDC enabled" - nu := nar.URL{Hash: "testnarinfo1", Compression: nar.CompressionTypeNone} + nu := nar.URL{Hash: "1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0", Compression: nar.CompressionTypeNone} err = c.PutNar(ctx, nu, io.NopCloser(strings.NewReader(content))) require.NoError(t, err) @@ -214,24 +214,24 @@ func testCDCGetNarInfo(factory cacheFactory) func(*testing.T) { assert.Positive(t, count, "chunks should exist in database") // Store a narinfo that references this NAR - niText := `StorePath: /nix/store/test-path -URL: nar/testnarinfo1.nar + niText := `StorePath: /nix/store/0amzzlz5w7ihknr59cn0q56pvp17bqqz-test-path +URL: nar/1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0.nar Compression: none -FileHash: sha256:0000000000000000000000000000000000000000000000000000000000000000 +FileHash: sha256:1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0 FileSize: 52 -NarHash: sha256:0000000000000000000000000000000000000000000000000000000000000000 +NarHash: sha256:1s8p1kgdms8rmxkq24q51wc7zpn0aqcwgzvc473v9cii7z2qyxq0 NarSize: 52 ` - err = c.PutNarInfo(ctx, "testnarinfohash", io.NopCloser(strings.NewReader(niText))) + err = c.PutNarInfo(ctx, "0amzzlz5w7ihknr59cn0q56pvp17bqqz", io.NopCloser(strings.NewReader(niText))) require.NoError(t, err) // Now call GetNarInfo. Since the NAR is stored as chunks and NOT as a whole file, // the old version of getNarInfoFromDatabase would fail to find it and purge the narinfo. - _, err = c.GetNarInfo(ctx, "testnarinfohash") + _, err = c.GetNarInfo(ctx, "0amzzlz5w7ihknr59cn0q56pvp17bqqz") require.NoError(t, err, "GetNarInfo should succeed even if NAR is only in chunks") // Verify that the narinfo still exists in the database - _, err = c.GetNarInfo(ctx, "testnarinfohash") + _, err = c.GetNarInfo(ctx, "0amzzlz5w7ihknr59cn0q56pvp17bqqz") require.NoError(t, err, "GetNarInfo should still succeed (not purged)") } } diff --git a/pkg/nar/filepath_test.go b/pkg/nar/filepath_test.go index c5721769..232e8912 100644 --- a/pkg/nar/filepath_test.go +++ b/pkg/nar/filepath_test.go @@ -19,8 +19,16 @@ func TestNarFilePath(t *testing.T) { compression string path string }{ - {hash: "abc123", compression: "", path: filepath.Join("a", "ab", "abc123.nar")}, - {hash: "def456", compression: "xz", path: filepath.Join("d", "de", "def456.nar.xz")}, + { + hash: "1mb5fxh7nzbx1b2q40bgzwjnjh8xqfap9mfnfqxlvvgvdyv8xwps", + compression: "", + path: filepath.Join("1", "1m", "1mb5fxh7nzbx1b2q40bgzwjnjh8xqfap9mfnfqxlvvgvdyv8xwps.nar"), + }, + { + hash: "1mb5fxh7nzbx1b2q40bgzwjnjh8xqfap9mfnfqxlvvgvdyv8xwps", + compression: "xz", + path: filepath.Join("1", "1m", "1mb5fxh7nzbx1b2q40bgzwjnjh8xqfap9mfnfqxlvvgvdyv8xwps.nar.xz"), + }, } for _, test := range []string{"", "a", "ab"} { diff --git a/pkg/nar/hash.go b/pkg/nar/hash.go index 38fe8a2e..8f9c7564 100644 --- a/pkg/nar/hash.go +++ b/pkg/nar/hash.go @@ -3,17 +3,23 @@ package nar import ( "errors" "regexp" + + "github.com/kalbasit/ncps/pkg/narinfo" ) +// NormalizedHashPattern defines the valid characters for a Nix32 encoded hash. +// Nix32 uses a 32-character alphabet excluding 'e', 'o', 'u', and 't'. +// Valid characters: 0-9, a-d, f-n, p-s, v-z +// Hashes must be exactly 52 characters long. +const NormalizedHashPattern = `[0-9a-df-np-sv-z]{52}` + +const HashPattern = `(` + narinfo.HashPattern + `-)?` + NormalizedHashPattern + var ( // ErrInvalidHash is returned if the hash is not valid. ErrInvalidHash = errors.New("invalid nar hash") - // narHashPattern defines the valid characters for a nar hash. - //nolint:gochecknoglobals // This is used in other regexes to ensure they validate the same thing. - narHashPattern = `[a-z0-9_-]+` - - narHashRegexp = regexp.MustCompile(`^(` + narHashPattern + `)$`) + narHashRegexp = regexp.MustCompile(`^(` + HashPattern + `)$`) ) func ValidateHash(hash string) error { diff --git a/pkg/nar/url.go b/pkg/nar/url.go index 57cb2b2a..5f6619d5 100644 --- a/pkg/nar/url.go +++ b/pkg/nar/url.go @@ -15,8 +15,8 @@ var ( // ErrInvalidURL is returned if the regexp did not match the given URL. ErrInvalidURL = errors.New("invalid nar URL") - // https://regex101.com/r/yPwxpw/4 - narRegexp = regexp.MustCompile(`^nar/(` + narHashPattern + `)\.nar(\.([a-z0-9]+))?(\?([a-z0-9=&]*))?$`) + // hashValidationRegexp validates that a string matches the HashPattern. + hashValidationRegexp = regexp.MustCompile(`^(` + HashPattern + `)$`) ) // URL represents a nar URL. @@ -27,29 +27,65 @@ type URL struct { } // ParseURL parses a nar URL (as present in narinfo) and returns its components. +// It accepts URLs in the format: [path/].nar[.][?query] +// The hash must match HashPattern. This implementation is flexible about the +// directory structure - only the filename matters, not the "nar/" prefix. func ParseURL(u string) (URL, error) { - if u == "" || !strings.HasPrefix(u, "nar/") { + if u == "" { return URL{}, ErrInvalidURL } - sm := narRegexp.FindStringSubmatch(u) - if len(sm) != 6 { + // Separate the query string from the path + pathPart, rawQuery, _ := strings.Cut(u, "?") + + // Get the filename (last component of the path) + filename := filepath.Base(pathPart) + if filename == "" || filename == "." { + return URL{}, ErrInvalidURL + } + + // The filename must contain ".nar" followed by optional compression extension + // Format: hash.nar[.compression] + // Everything before .nar is the hash, everything after is optional compression + hash, afterNar, found := strings.Cut(filename, ".nar") + if !found || hash == "" { return URL{}, ErrInvalidURL } - nu := URL{Hash: sm[1]} + // Validate that the hash matches HashPattern before processing further + if !hashValidationRegexp.MatchString(hash) { + return URL{}, ErrInvalidURL + } - var err error + // Extract compression extension (e.g., ".bz2" -> "bz2", "" -> "") + var compression string - if nu.Compression, err = CompressionTypeFromExtension(sm[3]); err != nil { + if afterNar != "" { + // afterNar should start with a dot + if !strings.HasPrefix(afterNar, ".") { + return URL{}, ErrInvalidURL + } + + compression = afterNar[1:] // remove leading dot + } + + // Determine compression type + ct, err := CompressionTypeFromExtension(compression) + if err != nil { return URL{}, fmt.Errorf("error computing the compression type: %w", err) } - if nu.Query, err = url.ParseQuery(sm[5]); err != nil { + // Parse the query string if present + query, err := url.ParseQuery(rawQuery) + if err != nil { return URL{}, fmt.Errorf("error parsing the RawQuery as url.Values: %w", err) } - return nu, nil + return URL{ + Hash: hash, + Compression: ct, + Query: query, + }, nil } // NewLogger returns a new logger with the right fields. diff --git a/pkg/nar/url_test.go b/pkg/nar/url_test.go index bb79ea06..6bfde518 100644 --- a/pkg/nar/url_test.go +++ b/pkg/nar/url_test.go @@ -125,7 +125,7 @@ func TestParseURL(t *testing.T) { narURL, err := nar.ParseURL(test.url) - if assert.ErrorIs(t, test.err, err) { + if assert.ErrorIs(t, err, test.err) { assert.Equal(t, test.narURL, narURL) } }) diff --git a/pkg/server/server_test.go b/pkg/server/server_test.go index 2044007b..374b7da6 100644 --- a/pkg/server/server_test.go +++ b/pkg/server/server_test.go @@ -918,7 +918,7 @@ func TestGetNar_ZstdCompression(t *testing.T) { // 1. Put an uncompressed Nar into the cache. narData := strings.Repeat("uncompressed nar data ", 1000) - narHash := "00000000000000000000000000000001" // dummy 32-char hash + narHash := "0000000000000000000000000000000000000000000000000001" // dummy 52-char hash narURL := ts.URL + "/nar/" + narHash + ".nar" req, err := http.NewRequestWithContext( @@ -991,7 +991,7 @@ func TestGetNar_NoZstdCompression(t *testing.T) { // 1. Put an uncompressed Nar into the cache. narData := uncompressedNarData - narHash := "00000000000000000000000000000002" // dummy 32-char hash + narHash := "0000000000000000000000000000000000000000000000000002" // dummy 52-char hash narURL := ts.URL + "/nar/" + narHash + ".nar" req, err := http.NewRequestWithContext( @@ -1057,7 +1057,7 @@ func TestGetNar_ZstdCompression_Head(t *testing.T) { // 1. Put an uncompressed Nar into the cache. narData := uncompressedNarData - narHash := "00000000000000000000000000000003" // dummy 32-char hash + narHash := "0000000000000000000000000000000000000000000000000003" // dummy 52-char hash narURL := ts.URL + "/nar/" + narHash + ".nar" req, err := http.NewRequestWithContext( @@ -1145,7 +1145,7 @@ func TestGetNar_HeaderSettingSequence(t *testing.T) { // Put an uncompressed Nar into the cache. narData := uncompressedNarData - narHash := "00000000000000000000000000000004" + narHash := "0000000000000000000000000000000000000000000000000004" narURL := "/nar/" + narHash + ".nar" req := httptest.NewRequest(http.MethodPut, narURL, strings.NewReader(narData)) diff --git a/pkg/storage/s3/s3_test.go b/pkg/storage/s3/s3_test.go index 928bde5a..ff34d677 100644 --- a/pkg/storage/s3/s3_test.go +++ b/pkg/storage/s3/s3_test.go @@ -526,7 +526,7 @@ func TestHasNarInfo_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - exists := store.HasNarInfo(ctx, "hash") + exists := store.HasNarInfo(ctx, "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27") assert.False(t, exists) }) @@ -783,7 +783,7 @@ func TestHasNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} exists := store.HasNar(ctx, narURL) assert.False(t, exists) }) @@ -841,7 +841,7 @@ func TestPutNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} _, err = store.PutNar(ctx, narURL, strings.NewReader("content")) require.Error(t, err) assert.Contains(t, err.Error(), "error checking if nar exists") @@ -873,7 +873,7 @@ func TestPutNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} _, err = store.PutNar(ctx, narURL, strings.NewReader("content")) require.Error(t, err) assert.Contains(t, err.Error(), "error putting nar to S3") @@ -935,7 +935,7 @@ func TestNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} _, _, err = store.GetNar(ctx, narURL) require.Error(t, err) assert.Contains(t, err.Error(), "get failed") @@ -962,7 +962,7 @@ func TestNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} err = store.DeleteNar(ctx, narURL) require.Error(t, err) assert.Contains(t, err.Error(), "error checking if nar exists") @@ -993,7 +993,7 @@ func TestNar_ErrorPaths(t *testing.T) { store, err := storage_s3.New(ctx, cfgWithMock) require.NoError(t, err) - narURL := nar.URL{Hash: "hash", Compression: "none"} + narURL := nar.URL{Hash: "00ji9synj1r6h6sjw27wwv8fw98myxsg92q5ma1pvrbmh451kc27", Compression: "none"} err = store.DeleteNar(ctx, narURL) require.Error(t, err) assert.Contains(t, err.Error(), "delete failed")