diff --git a/internal/gitclone/manager.go b/internal/gitclone/manager.go index 543648b..192163e 100644 --- a/internal/gitclone/manager.go +++ b/internal/gitclone/manager.go @@ -555,6 +555,24 @@ func (r *Repository) GetUpstreamRefs(ctx context.Context) (map[string]string, er return ParseGitRefs(output), nil } +func (r *Repository) Repack(ctx context.Context) error { + r.mu.RLock() + defer r.mu.RUnlock() + + logger := logging.FromContext(ctx) + logger.InfoContext(ctx, "Full repack started", "upstream", r.upstreamURL) + + // #nosec G204 - r.path is controlled by us + cmd := exec.CommandContext(ctx, "git", "-C", r.path, "repack", "-adb", "--write-midx", "--write-bitmap-index") + output, err := cmd.CombinedOutput() + if err != nil { + return errors.Wrapf(err, "git repack: %s", string(output)) + } + + logger.InfoContext(ctx, "Full repack completed", "upstream", r.upstreamURL) + return nil +} + func (r *Repository) HasCommit(ctx context.Context, ref string) bool { r.mu.RLock() defer r.mu.RUnlock() diff --git a/internal/gitclone/manager_test.go b/internal/gitclone/manager_test.go index 1444da4..2578e99 100644 --- a/internal/gitclone/manager_test.go +++ b/internal/gitclone/manager_test.go @@ -324,6 +324,35 @@ func TestRepository_CloneSetsMirrorConfig(t *testing.T) { } } +func TestRepository_Repack(t *testing.T) { + _, ctx := logging.Configure(t.Context(), logging.Config{Level: slog.LevelError}) + tmpDir := t.TempDir() + upstreamPath := createBareRepo(t, tmpDir) + + clonePath := filepath.Join(tmpDir, "mirror") + cmd := exec.Command("git", "clone", "--mirror", upstreamPath, clonePath) + assert.NoError(t, cmd.Run()) + + repo := &Repository{ + state: StateReady, + path: clonePath, + upstreamURL: upstreamPath, + fetchSem: make(chan struct{}, 1), + } + repo.fetchSem <- struct{}{} + + assert.NoError(t, repo.Repack(ctx)) + + // Verify a pack file exists after repack. + packs, err := filepath.Glob(filepath.Join(clonePath, "objects", "pack", "*.pack")) + assert.NoError(t, err) + assert.True(t, len(packs) > 0, "expected at least one pack file after repack") + + // Verify multi-pack-index was written. + _, err = os.Stat(filepath.Join(clonePath, "objects", "pack", "multi-pack-index")) + assert.NoError(t, err) +} + func TestRepository_HasCommit(t *testing.T) { ctx := context.Background() tmpDir := t.TempDir() diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 5f2dc32..c0dd10b 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -35,6 +35,7 @@ func Register(r *strategy.Registry, scheduler jobscheduler.Scheduler, cloneManag type Config struct { SnapshotInterval time.Duration `hcl:"snapshot-interval,optional" help:"How often to generate tar.zstd snapshots. 0 disables snapshots." default:"0"` + RepackInterval time.Duration `hcl:"repack-interval,optional" help:"How often to run full repack. 0 disables." default:"0"` } type Strategy struct { @@ -100,6 +101,9 @@ func New( if s.config.SnapshotInterval > 0 { s.scheduleSnapshotJobs(repo) } + if s.config.RepackInterval > 0 { + s.scheduleRepackJobs(repo) + } } s.proxy = &httputil.ReverseProxy{ @@ -409,6 +413,9 @@ func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) { if s.config.SnapshotInterval > 0 { s.scheduleSnapshotJobs(repo) } + if s.config.RepackInterval > 0 { + s.scheduleRepackJobs(repo) + } } func (s *Strategy) maybeBackgroundFetch(repo *gitclone.Repository) { diff --git a/internal/strategy/git/repack.go b/internal/strategy/git/repack.go new file mode 100644 index 0000000..8734fd5 --- /dev/null +++ b/internal/strategy/git/repack.go @@ -0,0 +1,13 @@ +package git + +import ( + "context" + + "github.com/block/cachew/internal/gitclone" +) + +func (s *Strategy) scheduleRepackJobs(repo *gitclone.Repository) { + s.scheduler.SubmitPeriodicJob(repo.UpstreamURL(), "repack-periodic", s.config.RepackInterval, func(ctx context.Context) error { + return repo.Repack(ctx) + }) +} diff --git a/internal/strategy/git/repack_test.go b/internal/strategy/git/repack_test.go new file mode 100644 index 0000000..72336c2 --- /dev/null +++ b/internal/strategy/git/repack_test.go @@ -0,0 +1,72 @@ +package git_test + +import ( + "context" + "os" + "path/filepath" + "testing" + "time" + + "github.com/alecthomas/assert/v2" + + "github.com/block/cachew/internal/gitclone" + "github.com/block/cachew/internal/githubapp" + "github.com/block/cachew/internal/jobscheduler" + "github.com/block/cachew/internal/logging" + "github.com/block/cachew/internal/strategy/git" +) + +func TestRepackInterval(t *testing.T) { + _, ctx := logging.Configure(context.Background(), logging.Config{}) + tmpDir := t.TempDir() + + tests := []struct { + name string + repackInterval time.Duration + }{ + { + name: "Enabled", + repackInterval: 24 * time.Hour, + }, + { + name: "Disabled", + repackInterval: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mux := newTestMux() + cm := gitclone.NewManagerProvider(ctx, gitclone.Config{ + MirrorRoot: filepath.Join(tmpDir, tt.name), + }, nil) + s, err := git.New(ctx, git.Config{ + RepackInterval: tt.repackInterval, + }, jobscheduler.New(ctx, jobscheduler.Config{}), nil, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil + assert.NoError(t, err) + assert.NotZero(t, s) + }) + } +} + +func TestRepackScheduledForExistingRepos(t *testing.T) { + _, ctx := logging.Configure(context.Background(), logging.Config{}) + tmpDir := t.TempDir() + + // Create a fake bare clone directory on disk before initializing strategy. + clonePath := filepath.Join(tmpDir, "github.com", "org", "repo") + err := os.MkdirAll(clonePath, 0o750) + assert.NoError(t, err) + err = os.WriteFile(filepath.Join(clonePath, "HEAD"), []byte("ref: refs/heads/main\n"), 0o640) + assert.NoError(t, err) + + mux := newTestMux() + cm := gitclone.NewManagerProvider(ctx, gitclone.Config{ + MirrorRoot: tmpDir, + }, nil) + s, err := git.New(ctx, git.Config{ + RepackInterval: 24 * time.Hour, + }, jobscheduler.New(ctx, jobscheduler.Config{}), nil, mux, cm, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil + assert.NoError(t, err) + assert.NotZero(t, s) +}