From eb8cf2adae07e0a176f8f14966101c990af65881 Mon Sep 17 00:00:00 2001 From: Elizabeth Worstell Date: Mon, 16 Mar 2026 14:20:13 -0700 Subject: [PATCH 1/3] fix: resolve git fetch/pull returning stale refs Two fixes for git fetch/pull not working from workstations: 1. ensureRefsUpToDate now fetches directly when ls-remote confirms stale refs, bypassing the NeedsFetch time guard that was silently skipping the fetch even after staleness was detected. 2. Switch from full repack (-adb) to geometric repacking (--geometric=2). Full repack recompresses ALL objects and ran 15+ minutes on large repositories, blocking git http-backend. Geometric repacking only merges when small packs accumulate, completing near-instantly in steady state. A 10-minute timeout with process group kill is added as a safety net. Co-authored-by: Amp Amp-Thread-ID: https://ampcode.com/threads/T-019cf844-5ed1-74ae-bc57-27d068380f2a --- internal/gitclone/manager.go | 26 ++++++++++++++++++++------ internal/strategy/git/backend.go | 19 +++++++------------ internal/strategy/git/git.go | 17 +++++++++++++---- internal/strategy/git/snapshot.go | 7 +++++-- 4 files changed, 45 insertions(+), 24 deletions(-) diff --git a/internal/gitclone/manager.go b/internal/gitclone/manager.go index 06f0609..576904b 100644 --- a/internal/gitclone/manager.go +++ b/internal/gitclone/manager.go @@ -694,21 +694,35 @@ func (r *Repository) GetUpstreamRefs(ctx context.Context) (map[string]string, er return ParseGitRefs(output), nil } +// repackTimeout bounds `git repack` so a slow repack on a large repository +// cannot block the scheduler queue indefinitely. +const repackTimeout = 10 * time.Minute + +// Repack consolidates pack files using geometric repacking. Unlike a full +// repack (-a), geometric repacking only merges packs when there is significant +// fragmentation (many small packs), making it orders of magnitude faster on +// large repositories in steady state. The --write-midx and --write-bitmap-index +// flags maintain the multi-pack index and reachability bitmaps for efficient +// serving via git http-backend. func (r *Repository) Repack(ctx context.Context) error { - r.mu.RLock() - defer r.mu.RUnlock() - logger := logging.FromContext(ctx) - logger.InfoContext(ctx, "Full repack started", "upstream", r.upstreamURL) + logger.InfoContext(ctx, "Geometric repack started", "upstream", r.upstreamURL) + + repackCtx, cancel := context.WithTimeout(ctx, repackTimeout) + defer cancel() // #nosec G204 - r.path is controlled by us - cmd := exec.CommandContext(ctx, "git", "-C", r.path, "repack", "-adb", "--write-midx", "--write-bitmap-index") + cmd := exec.CommandContext(repackCtx, "git", "-C", r.path, "repack", "-d", "--geometric=2", "--write-midx", "--write-bitmap-index") + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Cancel = func() error { + return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } output, err := cmd.CombinedOutput() if err != nil { return errors.Wrapf(err, "git repack: %s", string(output)) } - logger.InfoContext(ctx, "Full repack completed", "upstream", r.upstreamURL) + logger.InfoContext(ctx, "Geometric repack completed", "upstream", r.upstreamURL) return nil } diff --git a/internal/strategy/git/backend.go b/internal/strategy/git/backend.go index cd31b84..4bb8db2 100644 --- a/internal/strategy/git/backend.go +++ b/internal/strategy/git/backend.go @@ -11,8 +11,6 @@ import ( "path/filepath" "strings" - "github.com/alecthomas/errors" - "github.com/block/cachew/internal/gitclone" "github.com/block/cachew/internal/httputil" "github.com/block/cachew/internal/logging" @@ -180,17 +178,14 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, repo return false } -func (s *Strategy) ensureRefsUpToDate(ctx context.Context, repo *gitclone.Repository) error { +// checkRefsStale checks whether the local mirror's refs are behind upstream. +// Returns true if a fetch is needed. The caller decides whether to fetch +// synchronously or fall back to upstream. +func (s *Strategy) checkRefsStale(ctx context.Context, repo *gitclone.Repository) bool { needsFetch, err := repo.EnsureRefsUpToDate(ctx) if err != nil { - return errors.Wrap(err, "check upstream refs") - } - if needsFetch { - logger := logging.FromContext(ctx) - logger.DebugContext(ctx, "Refs stale, fetching synchronously", "upstream", repo.UpstreamURL()) - if err := s.backgroundFetch(ctx, repo); err != nil { - logger.WarnContext(ctx, "Synchronous fetch failed", "upstream", repo.UpstreamURL(), "error", err) - } + logging.FromContext(ctx).WarnContext(ctx, "Failed to check upstream refs", "error", err) + return false } - return nil + return needsFetch } diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 45c7ba2..ef9bbd0 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -261,10 +261,14 @@ func (s *Strategy) serveReadyRepo(w http.ResponseWriter, r *http.Request, repo * ctx := r.Context() logger := logging.FromContext(ctx) - if isInfoRefs { - if err := s.ensureRefsUpToDate(ctx, repo); err != nil { - logger.WarnContext(ctx, "Failed to check upstream refs", "error", err) - } + if isInfoRefs && s.checkRefsStale(ctx, repo) { + // Mirror is behind upstream. Forward to upstream so the client gets + // fresh refs immediately, and kick off a background fetch so the + // mirror catches up for subsequent requests. + logger.InfoContext(ctx, "Refs stale, forwarding to upstream and fetching in background", "upstream", repo.UpstreamURL()) + s.submitFetch(repo) + s.forwardToUpstream(w, r, host, pathValue) + return } s.maybeBackgroundFetch(repo) @@ -554,7 +558,12 @@ func (s *Strategy) maybeBackgroundFetch(repo *gitclone.Repository) { if !repo.NeedsFetch(s.cloneManager.Config().FetchInterval) { return } + s.submitFetch(repo) +} +// submitFetch schedules a fetch unconditionally. Use this when ls-remote has +// already confirmed the mirror is behind upstream. +func (s *Strategy) submitFetch(repo *gitclone.Repository) { // Use a separate queue from snapshot/repack so fetches are not serialized // behind long-running jobs on the same upstream URL queue. s.scheduler.Submit(repo.UpstreamURL()+"/fetch", "fetch", func(ctx context.Context) error { diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index fc87432..9f82229 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -190,8 +190,11 @@ func (s *Strategy) handleSnapshotRequest(w http.ResponseWriter, r *http.Request, http.Error(w, "Repository unavailable", http.StatusServiceUnavailable) return } - if err := s.ensureRefsUpToDate(ctx, repo); err != nil { - logger.WarnContext(ctx, "Failed to check upstream refs for snapshot", "upstream", upstreamURL, "error", err) + if s.checkRefsStale(ctx, repo) { + logger.InfoContext(ctx, "Refs stale for snapshot request, fetching", "upstream", upstreamURL) + if err := repo.Fetch(ctx); err != nil { + logger.WarnContext(ctx, "Fetch for snapshot failed", "upstream", upstreamURL, "error", err) + } } cacheKey := snapshotCacheKey(upstreamURL) From f03ae86e2b7c5988f1361444b4a8cbd5b1bfb112 Mon Sep 17 00:00:00 2001 From: Elizabeth Worstell Date: Mon, 16 Mar 2026 15:02:37 -0700 Subject: [PATCH 2/3] fix: bypass NeedsFetch time guard for ls-remote-triggered fetches submitFetch is called when ls-remote has already confirmed the mirror is stale. It was calling backgroundFetch which re-checks NeedsFetch(15m) and silently drops the fetch if one happened recently (e.g. post-restore). This caused the mirror to never catch up, making every request hit the upstream fallback path. Co-authored-by: Amp Amp-Thread-ID: https://ampcode.com/threads/T-019cf8a0-d586-7109-9e69-9cb7d9b27f6c --- internal/strategy/git/git.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index ef9bbd0..6c7f9d4 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -567,15 +567,11 @@ func (s *Strategy) submitFetch(repo *gitclone.Repository) { // Use a separate queue from snapshot/repack so fetches are not serialized // behind long-running jobs on the same upstream URL queue. s.scheduler.Submit(repo.UpstreamURL()+"/fetch", "fetch", func(ctx context.Context) error { - return s.backgroundFetch(ctx, repo) + return s.doFetch(ctx, repo) }) } -func (s *Strategy) backgroundFetch(ctx context.Context, repo *gitclone.Repository) error { - if !repo.NeedsFetch(s.cloneManager.Config().FetchInterval) { - return nil - } - +func (s *Strategy) doFetch(ctx context.Context, repo *gitclone.Repository) error { logger := logging.FromContext(ctx) logger.InfoContext(ctx, "Fetching updates", "upstream", repo.UpstreamURL(), "path", repo.Path()) From 47748c24a639eef94c3ec2933958f2ebb1cfacb8 Mon Sep 17 00:00:00 2001 From: Elizabeth Worstell Date: Mon, 16 Mar 2026 15:57:25 -0700 Subject: [PATCH 3/3] refactor: return error from checkRefsStale, log at callsite Co-authored-by: Amp Amp-Thread-ID: https://ampcode.com/threads/T-019cf8a0-d586-7109-9e69-9cb7d9b27f6c --- internal/strategy/git/backend.go | 9 +++++---- internal/strategy/git/git.go | 6 +++++- internal/strategy/git/snapshot.go | 6 +++++- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/internal/strategy/git/backend.go b/internal/strategy/git/backend.go index 4bb8db2..4c70d17 100644 --- a/internal/strategy/git/backend.go +++ b/internal/strategy/git/backend.go @@ -11,6 +11,8 @@ import ( "path/filepath" "strings" + "github.com/alecthomas/errors" + "github.com/block/cachew/internal/gitclone" "github.com/block/cachew/internal/httputil" "github.com/block/cachew/internal/logging" @@ -181,11 +183,10 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, repo // checkRefsStale checks whether the local mirror's refs are behind upstream. // Returns true if a fetch is needed. The caller decides whether to fetch // synchronously or fall back to upstream. -func (s *Strategy) checkRefsStale(ctx context.Context, repo *gitclone.Repository) bool { +func (s *Strategy) checkRefsStale(ctx context.Context, repo *gitclone.Repository) (bool, error) { needsFetch, err := repo.EnsureRefsUpToDate(ctx) if err != nil { - logging.FromContext(ctx).WarnContext(ctx, "Failed to check upstream refs", "error", err) - return false + return false, errors.Wrap(err, "check upstream refs") } - return needsFetch + return needsFetch, nil } diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 6c7f9d4..f719bb3 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -261,7 +261,11 @@ func (s *Strategy) serveReadyRepo(w http.ResponseWriter, r *http.Request, repo * ctx := r.Context() logger := logging.FromContext(ctx) - if isInfoRefs && s.checkRefsStale(ctx, repo) { + stale, err := s.checkRefsStale(ctx, repo) + if err != nil { + logger.WarnContext(ctx, "Failed to check upstream refs", "upstream", repo.UpstreamURL(), "error", err) + } + if isInfoRefs && stale { // Mirror is behind upstream. Forward to upstream so the client gets // fresh refs immediately, and kick off a background fetch so the // mirror catches up for subsequent requests. diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index 9f82229..45079e8 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -190,7 +190,11 @@ func (s *Strategy) handleSnapshotRequest(w http.ResponseWriter, r *http.Request, http.Error(w, "Repository unavailable", http.StatusServiceUnavailable) return } - if s.checkRefsStale(ctx, repo) { + refsStale, err := s.checkRefsStale(ctx, repo) + if err != nil { + logger.WarnContext(ctx, "Failed to check upstream refs", "upstream", upstreamURL, "error", err) + } + if refsStale { logger.InfoContext(ctx, "Refs stale for snapshot request, fetching", "upstream", upstreamURL) if err := repo.Fetch(ctx); err != nil { logger.WarnContext(ctx, "Fetch for snapshot failed", "upstream", upstreamURL, "error", err)