From 2bf36e6f04cb9e615a60814de7abc25b4b8cba8f Mon Sep 17 00:00:00 2001 From: Alec Thomas Date: Wed, 18 Feb 2026 10:17:05 +1100 Subject: [PATCH] refactor: remove bundle snapshots These did not really help with git clone performance. --- internal/strategy/git/bundle.go | 57 ------------- internal/strategy/git/bundle_test.go | 120 --------------------------- internal/strategy/git/git.go | 26 +----- 3 files changed, 1 insertion(+), 202 deletions(-) delete mode 100644 internal/strategy/git/bundle.go delete mode 100644 internal/strategy/git/bundle_test.go diff --git a/internal/strategy/git/bundle.go b/internal/strategy/git/bundle.go deleted file mode 100644 index 115609f..0000000 --- a/internal/strategy/git/bundle.go +++ /dev/null @@ -1,57 +0,0 @@ -package git - -import ( - "bytes" - "context" - "log/slog" - "net/http" - "os/exec" - "time" - - "github.com/alecthomas/errors" - - "github.com/block/cachew/internal/cache" - "github.com/block/cachew/internal/gitclone" - "github.com/block/cachew/internal/logging" -) - -func (s *Strategy) generateAndUploadBundle(ctx context.Context, repo *gitclone.Repository) error { - logger := logging.FromContext(ctx) - upstream := repo.UpstreamURL() - - logger.InfoContext(ctx, "Bundle generation started", slog.String("upstream", upstream)) - - cacheKey := cache.NewKey(upstream + ".bundle") - - headers := http.Header{ - "Content-Type": []string{"application/x-git-bundle"}, - } - ttl := 7 * 24 * time.Hour - w, err := s.cache.Create(ctx, cacheKey, headers, ttl) - if err != nil { - return errors.Wrap(err, "create cache entry") - } - defer w.Close() - - err = errors.Wrap(repo.WithReadLock(func() error { - var stderr bytes.Buffer - // Use --branches --remotes to include all branches but exclude tags (which can be massive) - // #nosec G204 - repo.Path() is controlled by us - cmd := exec.CommandContext(ctx, "git", "-C", repo.Path(), "bundle", "create", "-", "--branches", "--remotes") - cmd.Stdout = w - cmd.Stderr = &stderr - - if err := cmd.Run(); err != nil { - return errors.Wrapf(err, "bundle generation failed: %s", stderr.String()) - } - - return nil - }), "generate bundle") - if err != nil { - logger.ErrorContext(ctx, "Bundle generation failed", slog.String("upstream", upstream), slog.String("error", err.Error())) - return err - } - - logger.InfoContext(ctx, "Bundle generation completed", slog.String("upstream", upstream)) - return nil -} diff --git a/internal/strategy/git/bundle_test.go b/internal/strategy/git/bundle_test.go deleted file mode 100644 index 915b7ff..0000000 --- a/internal/strategy/git/bundle_test.go +++ /dev/null @@ -1,120 +0,0 @@ -package git_test - -import ( - "context" - "net/http" - "net/http/httptest" - "testing" - "time" - - "github.com/alecthomas/assert/v2" - - "github.com/block/cachew/internal/cache" - "github.com/block/cachew/internal/gitclone" - "github.com/block/cachew/internal/githubapp" - "github.com/block/cachew/internal/jobscheduler" - "github.com/block/cachew/internal/logging" - "github.com/block/cachew/internal/strategy/git" -) - -func TestBundleHTTPEndpoint(t *testing.T) { - _, ctx := logging.Configure(context.Background(), logging.Config{}) - tmpDir := t.TempDir() - - cloneManager := gitclone.NewManagerProvider(ctx, gitclone.Config{ - MirrorRoot: tmpDir, - }, nil) - - memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{}) - assert.NoError(t, err) - mux := newTestMux() - - _, err = git.New(ctx, git.Config{ - BundleInterval: 24 * time.Hour, - }, jobscheduler.New(ctx, jobscheduler.Config{}), memCache, mux, cloneManager, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil - assert.NoError(t, err) - - // Create a fake bundle in the cache - upstreamURL := "https://github.com/org/repo" - cacheKey := cache.NewKey(upstreamURL + ".bundle") - bundleData := []byte("fake bundle data") - - headers := make(map[string][]string) - headers["Content-Type"] = []string{"application/x-git-bundle"} - writer, err := memCache.Create(ctx, cacheKey, headers, 24*time.Hour) - assert.NoError(t, err) - _, err = writer.Write(bundleData) - assert.NoError(t, err) - err = writer.Close() - assert.NoError(t, err) - - // Test bundle endpoint exists - handler := mux.handlers["GET /git/{host}/{path...}"] - assert.NotZero(t, handler) - - // Test successful bundle request - req := httptest.NewRequest(http.MethodGet, "/git/github.com/org/repo/bundle", nil) - req = req.WithContext(ctx) - req.SetPathValue("host", "github.com") - req.SetPathValue("path", "org/repo/bundle") - w := httptest.NewRecorder() - - handler.ServeHTTP(w, req) - - assert.Equal(t, 200, w.Code) - assert.Equal(t, "application/x-git-bundle", w.Header().Get("Content-Type")) - assert.Equal(t, bundleData, w.Body.Bytes()) - - // Test bundle not found - req = httptest.NewRequest(http.MethodGet, "/git/github.com/org/nonexistent/bundle", nil) - req = req.WithContext(ctx) - req.SetPathValue("host", "github.com") - req.SetPathValue("path", "org/nonexistent/bundle") - w = httptest.NewRecorder() - - handler.ServeHTTP(w, req) - - assert.Equal(t, 404, w.Code) -} - -func TestBundleInterval(t *testing.T) { - _, ctx := logging.Configure(context.Background(), logging.Config{}) - tmpDir := t.TempDir() - - tests := []struct { - name string - bundleInterval time.Duration - expectDefault bool - }{ - { - name: "CustomInterval", - bundleInterval: 1 * time.Hour, - expectDefault: false, - }, - { - name: "DefaultInterval", - bundleInterval: 0, - expectDefault: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - memCache, err := cache.NewMemory(ctx, cache.MemoryConfig{}) - assert.NoError(t, err) - mux := newTestMux() - - cloneManager := gitclone.NewManagerProvider(ctx, gitclone.Config{ - MirrorRoot: tmpDir, - }, nil) - - s, err := git.New(ctx, git.Config{ - BundleInterval: tt.bundleInterval, - }, jobscheduler.New(ctx, jobscheduler.Config{}), memCache, mux, cloneManager, func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil - assert.NoError(t, err) - assert.NotZero(t, s) - - // Strategy should be created successfully regardless of bundle interval - }) - } -} diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 57cffc3..5f2dc32 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -28,13 +28,12 @@ import ( ) func Register(r *strategy.Registry, scheduler jobscheduler.Scheduler, cloneManagerProvider gitclone.ManagerProvider, tokenManagerProvider githubapp.TokenManagerProvider) { - strategy.Register(r, "git", "Caches Git repositories, including bundle and tarball snapshots.", func(ctx context.Context, config Config, cache cache.Cache, mux strategy.Mux) (*Strategy, error) { + strategy.Register(r, "git", "Caches Git repositories, including tarball snapshots.", func(ctx context.Context, config Config, cache cache.Cache, mux strategy.Mux) (*Strategy, error) { return New(ctx, config, scheduler, cache, mux, cloneManagerProvider, tokenManagerProvider) }) } type Config struct { - BundleInterval time.Duration `hcl:"bundle-interval,optional" help:"How often to generate bundles. 0 disables bundling." default:"0"` SnapshotInterval time.Duration `hcl:"snapshot-interval,optional" help:"How often to generate tar.zstd snapshots. 0 disables snapshots." default:"0"` } @@ -98,9 +97,6 @@ func New( slog.String("error", err.Error())) } for _, repo := range existing { - if s.config.BundleInterval > 0 { - s.scheduleBundleJobs(repo) - } if s.config.SnapshotInterval > 0 { s.scheduleSnapshotJobs(repo) } @@ -140,7 +136,6 @@ func New( mux.Handle("POST /git/{host}/{path...}", http.HandlerFunc(s.handleRequest)) logger.InfoContext(ctx, "Git strategy initialized", - "bundle_interval", config.BundleInterval, "snapshot_interval", config.SnapshotInterval) return s, nil @@ -169,11 +164,6 @@ func (s *Strategy) handleRequest(w http.ResponseWriter, r *http.Request) { slog.String("host", host), slog.String("path", pathValue)) - if strings.HasSuffix(pathValue, "/bundle") { - s.handleBundleRequest(w, r, host, pathValue) - return - } - if strings.HasSuffix(pathValue, "/snapshot") { s.handleSnapshotRequest(w, r, host, pathValue) return @@ -349,10 +339,6 @@ func ExtractRepoPath(pathValue string) string { return repoPath } -func (s *Strategy) handleBundleRequest(w http.ResponseWriter, r *http.Request, host, pathValue string) { - s.serveCachedArtifact(w, r, host, pathValue, "bundle") -} - func (s *Strategy) serveCachedArtifact(w http.ResponseWriter, r *http.Request, host, pathValue, artifact string) { ctx := r.Context() logger := logging.FromContext(ctx) @@ -420,10 +406,6 @@ func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) { slog.String("upstream", repo.UpstreamURL()), slog.String("path", repo.Path())) - if s.config.BundleInterval > 0 { - s.scheduleBundleJobs(repo) - } - if s.config.SnapshotInterval > 0 { s.scheduleSnapshotJobs(repo) } @@ -457,9 +439,3 @@ func (s *Strategy) backgroundFetch(ctx context.Context, repo *gitclone.Repositor slog.String("error", err.Error())) } } - -func (s *Strategy) scheduleBundleJobs(repo *gitclone.Repository) { - s.scheduler.SubmitPeriodicJob(repo.UpstreamURL(), "bundle-periodic", s.config.BundleInterval, func(ctx context.Context) error { - return s.generateAndUploadBundle(ctx, repo) - }) -}