diff --git a/internal/snapshot/snapshot_test.go b/internal/snapshot/snapshot_test.go index 9a34756..de35831 100644 --- a/internal/snapshot/snapshot_test.go +++ b/internal/snapshot/snapshot_test.go @@ -88,6 +88,55 @@ func TestCreateWithExcludePatterns(t *testing.T) { assert.IsError(t, err, os.ErrNotExist) } +func TestCreateExcludesOnlyGitLockFiles(t *testing.T) { + ctx := logging.ContextWithLogger(context.Background(), slog.Default()) + mem, err := cache.NewMemory(ctx, cache.MemoryConfig{LimitMB: 100, MaxTTL: time.Hour}) + assert.NoError(t, err) + defer mem.Close() + key := cache.Key{1, 2, 3} + + srcDir := t.TempDir() + // Tracked lock files that should be included. + assert.NoError(t, os.WriteFile(filepath.Join(srcDir, "package-lock.json"), []byte("npm"), 0o644)) + assert.NoError(t, os.WriteFile(filepath.Join(srcDir, "yarn.lock"), []byte("yarn"), 0o644)) + assert.NoError(t, os.MkdirAll(filepath.Join(srcDir, "subdir"), 0o755)) + assert.NoError(t, os.WriteFile(filepath.Join(srcDir, "subdir", "Gemfile.lock"), []byte("ruby"), 0o644)) + + // Git internal lock files that should be excluded. + assert.NoError(t, os.MkdirAll(filepath.Join(srcDir, ".git"), 0o755)) + assert.NoError(t, os.WriteFile(filepath.Join(srcDir, ".git", "index.lock"), []byte("git"), 0o644)) + assert.NoError(t, os.WriteFile(filepath.Join(srcDir, ".git", "HEAD"), []byte("ref: refs/heads/main"), 0o644)) + + err = snapshot.Create(ctx, mem, key, srcDir, time.Hour, []string{"./.git/*.lock"}, 0) + assert.NoError(t, err) + + dstDir := t.TempDir() + err = snapshot.Restore(ctx, mem, key, dstDir, 0) + assert.NoError(t, err) + + // Tracked lock files must be present. + content, err := os.ReadFile(filepath.Join(dstDir, "package-lock.json")) + assert.NoError(t, err) + assert.Equal(t, "npm", string(content)) + + content, err = os.ReadFile(filepath.Join(dstDir, "yarn.lock")) + assert.NoError(t, err) + assert.Equal(t, "yarn", string(content)) + + content, err = os.ReadFile(filepath.Join(dstDir, "subdir", "Gemfile.lock")) + assert.NoError(t, err) + assert.Equal(t, "ruby", string(content)) + + // Git internal lock file must be excluded. + _, err = os.Stat(filepath.Join(dstDir, ".git", "index.lock")) + assert.IsError(t, err, os.ErrNotExist) + + // Other .git files should still be present. + content, err = os.ReadFile(filepath.Join(dstDir, ".git", "HEAD")) + assert.NoError(t, err) + assert.Equal(t, "ref: refs/heads/main", string(content)) +} + func TestCreatePreservesSymlinks(t *testing.T) { ctx := logging.ContextWithLogger(context.Background(), slog.Default()) mem, err := cache.NewMemory(ctx, cache.MemoryConfig{LimitMB: 100, MaxTTL: time.Hour}) diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index 4690ac0..c6002b0 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -102,7 +102,7 @@ func (s *Strategy) generateAndUploadSnapshot(ctx context.Context, repo *gitclone } cacheKey := snapshotCacheKey(upstream) - excludePatterns := []string{"*.lock"} + excludePatterns := []string{"./.git/*.lock"} err = snapshot.Create(ctx, s.cache, cacheKey, snapshotDir, 0, excludePatterns, s.config.ZstdThreads) @@ -303,7 +303,7 @@ func (s *Strategy) streamSnapshotDirect(w http.ResponseWriter, r *http.Request, w.Header().Set("Content-Type", "application/zstd") w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filepath.Base(repoDir)+".tar.zst")) - excludePatterns := []string{"*.lock"} + excludePatterns := []string{"./.git/*.lock"} if err := snapshot.StreamTo(ctx, w, repoDir, excludePatterns, s.config.ZstdThreads); err != nil { logger.ErrorContext(ctx, "Failed to stream snapshot to client", "upstream", upstreamURL, "error", err) } @@ -369,7 +369,7 @@ func (s *Strategy) writeSnapshotSpool(w http.ResponseWriter, r *http.Request, re w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=%q", filepath.Base(repoDir)+".tar.zst")) tw := NewSpoolTeeWriter(w, spool) - excludePatterns := []string{"*.lock"} + excludePatterns := []string{"./.git/*.lock"} if err := snapshot.StreamTo(ctx, tw, repoDir, excludePatterns, s.config.ZstdThreads); err != nil { logger.ErrorContext(ctx, "Failed to stream snapshot to client", "upstream", upstreamURL, "error", err) spool.MarkError(err) diff --git a/internal/strategy/git/snapshot_test.go b/internal/strategy/git/snapshot_test.go index ba359a3..405ba64 100644 --- a/internal/strategy/git/snapshot_test.go +++ b/internal/strategy/git/snapshot_test.go @@ -133,6 +133,13 @@ func TestSnapshotOnDemandGenerationViaHTTP(t *testing.T) { // createTestMirrorRepo creates a bare mirror-style repo at mirrorPath with one commit. func createTestMirrorRepo(t *testing.T, mirrorPath string) { + t.Helper() + createTestMirrorRepoWithFiles(t, mirrorPath, map[string]string{ + "hello.txt": "hello\n", + }) +} + +func createTestMirrorRepoWithFiles(t *testing.T, mirrorPath string, files map[string]string) { t.Helper() tmpWork := t.TempDir() @@ -146,7 +153,11 @@ func createTestMirrorRepo(t *testing.T, mirrorPath string) { assert.NoError(t, err, string(output)) } - assert.NoError(t, os.WriteFile(filepath.Join(tmpWork, "hello.txt"), []byte("hello\n"), 0o644)) + for name, content := range files { + path := filepath.Join(tmpWork, name) + assert.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) + assert.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + } for _, args := range [][]string{ {"-C", tmpWork, "add", "."}, @@ -224,6 +235,54 @@ func TestSnapshotGenerationViaLocalClone(t *testing.T) { assert.True(t, os.IsNotExist(err)) } +func TestSnapshotGenerationIncludesTrackedLockFiles(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found in PATH") + } + + _, ctx := logging.Configure(context.Background(), logging.Config{}) + tmpDir := t.TempDir() + mirrorRoot := filepath.Join(tmpDir, "mirrors") + upstreamURL := "https://github.com/org/repo" + + mirrorPath := filepath.Join(mirrorRoot, "github.com", "org", "repo") + createTestMirrorRepoWithFiles(t, mirrorPath, map[string]string{ + "hello.txt": "hello\n", + "package-lock.json": "{\n \"name\": \"repo\"\n}\n", + "subdir/Gemfile.lock": "GEM\n", + }) + + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{MaxTTL: time.Hour}) + assert.NoError(t, err) + mux := newTestMux() + + cm := gitclone.NewManagerProvider(ctx, gitclone.Config{MirrorRoot: mirrorRoot}, nil) + s, err := git.New(ctx, git.Config{}, newTestScheduler(ctx, t), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil + assert.NoError(t, err) + + manager, err := cm() + assert.NoError(t, err) + repo, err := manager.GetOrCreate(ctx, upstreamURL) + assert.NoError(t, err) + assert.Equal(t, gitclone.StateReady, repo.State()) + + err = s.GenerateAndUploadSnapshot(ctx, repo) + assert.NoError(t, err) + + cacheKey := cache.NewKey(upstreamURL + ".snapshot") + restoreDir := filepath.Join(tmpDir, "restored") + err = snapshot.Restore(ctx, memCache, cacheKey, restoreDir, 0) + assert.NoError(t, err) + + data, err := os.ReadFile(filepath.Join(restoreDir, "package-lock.json")) + assert.NoError(t, err) + assert.Equal(t, "{\n \"name\": \"repo\"\n}\n", string(data)) + + data, err = os.ReadFile(filepath.Join(restoreDir, "subdir", "Gemfile.lock")) + assert.NoError(t, err) + assert.Equal(t, "GEM\n", string(data)) +} + func TestMirrorSnapshotRestoreDirectly(t *testing.T) { if _, err := exec.LookPath("git"); err != nil { t.Skip("git not found in PATH")