diff --git a/internal/strategy/git/export_test.go b/internal/strategy/git/export_test.go new file mode 100644 index 0000000..3fb1a17 --- /dev/null +++ b/internal/strategy/git/export_test.go @@ -0,0 +1,11 @@ +package git + +import ( + "context" + + "github.com/block/cachew/internal/gitclone" +) + +func (s *Strategy) GenerateAndUploadSnapshot(ctx context.Context, repo *gitclone.Repository) error { + return s.generateAndUploadSnapshot(ctx, repo) +} diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index c0dd10b..ae698c4 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -77,8 +77,10 @@ func New( if err != nil { return nil, errors.Wrap(err, "failed to create clone manager") } - if err := os.RemoveAll(filepath.Join(cloneManager.Config().MirrorRoot, ".spools")); err != nil { - return nil, errors.Wrap(err, "clean up stale spools") + for _, dir := range []string{".spools", ".snapshots"} { + if err := os.RemoveAll(filepath.Join(cloneManager.Config().MirrorRoot, dir)); err != nil { + return nil, errors.Wrapf(err, "clean up stale %s", dir) + } } s := &Strategy{ diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index 9d294fc..7d5ae15 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -4,6 +4,11 @@ import ( "context" "log/slog" "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" "time" "github.com/alecthomas/errors" @@ -14,20 +19,54 @@ import ( "github.com/block/cachew/internal/snapshot" ) +func snapshotDirForURL(mirrorRoot, upstreamURL string) string { + parsed, err := url.Parse(upstreamURL) + if err != nil { + return filepath.Join(mirrorRoot, ".snapshots", "unknown") + } + repoPath := strings.TrimSuffix(parsed.Path, ".git") + return filepath.Join(mirrorRoot, ".snapshots", parsed.Host, repoPath) +} + func (s *Strategy) generateAndUploadSnapshot(ctx context.Context, repo *gitclone.Repository) error { logger := logging.FromContext(ctx) upstream := repo.UpstreamURL() logger.InfoContext(ctx, "Snapshot generation started", slog.String("upstream", upstream)) + mirrorRoot := s.cloneManager.Config().MirrorRoot + snapshotDir := snapshotDirForURL(mirrorRoot, upstream) + + // Clean any previous snapshot working directory. + if err := os.RemoveAll(snapshotDir); err != nil { + return errors.Wrap(err, "remove previous snapshot dir") + } + if err := os.MkdirAll(filepath.Dir(snapshotDir), 0o750); err != nil { + return errors.Wrap(err, "create snapshot parent dir") + } + + // Local clone from the mirror — git hardlinks objects by default. + // #nosec G204 - repo.Path() and snapshotDir are controlled by us + cmd := exec.CommandContext(ctx, "git", "clone", repo.Path(), snapshotDir) + if output, err := cmd.CombinedOutput(); err != nil { + _ = os.RemoveAll(snapshotDir) + return errors.Wrapf(err, "git clone for snapshot: %s", string(output)) + } + cacheKey := cache.NewKey(upstream + ".snapshot") ttl := 7 * 24 * time.Hour excludePatterns := []string{"*.lock"} - err := errors.Wrap(snapshot.Create(ctx, s.cache, cacheKey, repo.Path(), ttl, excludePatterns), "create snapshot") + err := snapshot.Create(ctx, s.cache, cacheKey, snapshotDir, ttl, excludePatterns) + + // Always clean up the snapshot working directory. + if rmErr := os.RemoveAll(snapshotDir); rmErr != nil { + logger.WarnContext(ctx, "Failed to clean up snapshot dir", slog.String("error", rmErr.Error())) + } + if err != nil { logger.ErrorContext(ctx, "Snapshot generation failed", slog.String("upstream", upstream), slog.String("error", err.Error())) - return err + return errors.Wrap(err, "create snapshot") } logger.InfoContext(ctx, "Snapshot generation completed", slog.String("upstream", upstream)) diff --git a/internal/strategy/git/snapshot_test.go b/internal/strategy/git/snapshot_test.go index 52b6085..b87e02e 100644 --- a/internal/strategy/git/snapshot_test.go +++ b/internal/strategy/git/snapshot_test.go @@ -4,6 +4,9 @@ import ( "context" "net/http" "net/http/httptest" + "os" + "os/exec" + "path/filepath" "testing" "time" @@ -14,6 +17,7 @@ import ( "github.com/block/cachew/internal/githubapp" "github.com/block/cachew/internal/jobscheduler" "github.com/block/cachew/internal/logging" + "github.com/block/cachew/internal/snapshot" "github.com/block/cachew/internal/strategy/git" ) @@ -75,38 +79,89 @@ func TestSnapshotHTTPEndpoint(t *testing.T) { assert.Equal(t, 404, w.Code) } -func TestSnapshotInterval(t *testing.T) { - _, ctx := logging.Configure(context.Background(), logging.Config{}) - tmpDir := t.TempDir() +// createTestMirrorRepo creates a bare mirror-style repo at mirrorPath with one commit. +func createTestMirrorRepo(t *testing.T, mirrorPath string) { + t.Helper() + tmpWork := t.TempDir() + + for _, args := range [][]string{ + {"init", tmpWork}, + {"-C", tmpWork, "config", "user.email", "test@test.com"}, + {"-C", tmpWork, "config", "user.name", "Test"}, + } { + cmd := exec.Command("git", args...) + output, err := cmd.CombinedOutput() + assert.NoError(t, err, string(output)) + } + + assert.NoError(t, os.WriteFile(filepath.Join(tmpWork, "hello.txt"), []byte("hello\n"), 0o644)) - tests := []struct { - name string - snapshotInterval time.Duration - }{ - { - name: "CustomInterval", - snapshotInterval: 1 * time.Hour, - }, - { - name: "DefaultInterval", - snapshotInterval: 0, - }, + for _, args := range [][]string{ + {"-C", tmpWork, "add", "."}, + {"-C", tmpWork, "commit", "-m", "initial"}, + {"clone", "--mirror", tmpWork, mirrorPath}, + } { + cmd := exec.Command("git", args...) + output, err := cmd.CombinedOutput() + assert.NoError(t, err, string(output)) } +} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{}) - assert.NoError(t, err) - mux := newTestMux() - - cm := gitclone.NewManagerProvider(ctx, gitclone.Config{ - MirrorRoot: tmpDir, - }, nil) - s, err := git.New(ctx, git.Config{ - SnapshotInterval: tt.snapshotInterval, - }, jobscheduler.New(ctx, jobscheduler.Config{}), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil - assert.NoError(t, err) - assert.NotZero(t, s) - }) +func TestSnapshotGenerationViaLocalClone(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not found in PATH") } + + _, ctx := logging.Configure(context.Background(), logging.Config{}) + tmpDir := t.TempDir() + mirrorRoot := filepath.Join(tmpDir, "mirrors") + upstreamURL := "https://github.com/org/repo" + + // Create a mirror repo at the path the clone manager would use. + mirrorPath := filepath.Join(mirrorRoot, "github.com", "org", "repo") + createTestMirrorRepo(t, mirrorPath) + + memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{}) + assert.NoError(t, err) + mux := newTestMux() + + cm := gitclone.NewManagerProvider(ctx, gitclone.Config{MirrorRoot: mirrorRoot}, nil) + s, err := git.New(ctx, git.Config{}, jobscheduler.New(ctx, jobscheduler.Config{}), memCache, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil + assert.NoError(t, err) + + // GetOrCreate so the strategy knows about the repo. + manager, err := cm() + assert.NoError(t, err) + repo, err := manager.GetOrCreate(ctx, upstreamURL) + assert.NoError(t, err) + assert.Equal(t, gitclone.StateReady, repo.State()) + + // Generate the snapshot. + err = s.GenerateAndUploadSnapshot(ctx, repo) + assert.NoError(t, err) + + // Verify snapshot was uploaded to cache. + cacheKey := cache.NewKey(upstreamURL + ".snapshot") + _, headers, err := memCache.Open(ctx, cacheKey) + assert.NoError(t, err) + assert.Equal(t, "application/zstd", headers.Get("Content-Type")) + + // Restore the snapshot and verify it is a working (non-bare) checkout. + restoreDir := filepath.Join(tmpDir, "restored") + err = snapshot.Restore(ctx, memCache, cacheKey, restoreDir) + assert.NoError(t, err) + + // A non-bare clone has a .git directory (not a bare repo). + _, err = os.Stat(filepath.Join(restoreDir, ".git")) + assert.NoError(t, err) + + // The working tree should contain the committed file. + data, err := os.ReadFile(filepath.Join(restoreDir, "hello.txt")) + assert.NoError(t, err) + assert.Equal(t, "hello\n", string(data)) + + // Snapshot working directory should have been cleaned up. + snapshotWorkDir := filepath.Join(mirrorRoot, ".snapshots", "github.com", "org", "repo") + _, err = os.Stat(snapshotWorkDir) + assert.True(t, os.IsNotExist(err)) }