From b1131cef68291d6b696ad89a0d6bc63f5ec61f44 Mon Sep 17 00:00:00 2001 From: Elizabeth Worstell Date: Thu, 12 Mar 2026 22:32:05 -0700 Subject: [PATCH] fix: prevent git fetch from corrupting mirror snapshots generateAndUploadMirrorSnapshot tars the live bare mirror directory while git fetch can run concurrently on a separate scheduler queue. When git fetch replaces packed-refs (via temp file + rename) mid-tar, the archive captures a truncated file. Restoring this snapshot produces a mirror with an unterminated packed-refs line, breaking all subsequent git operations. Add WithFetchExclusion() which holds the repo's fetch semaphore, preventing concurrent fetches while the tar runs. This ensures the snapshot captures a consistent view of the mirror directory. Amp-Thread-ID: https://ampcode.com/threads/T-019ce597-b4ff-72ad-b096-d2851a7058ff Co-authored-by: Amp --- internal/gitclone/manager.go | 13 +++++++++++++ internal/strategy/git/snapshot.go | 9 +++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/internal/gitclone/manager.go b/internal/gitclone/manager.go index 0b1b20b..c06b679 100644 --- a/internal/gitclone/manager.go +++ b/internal/gitclone/manager.go @@ -333,6 +333,19 @@ func (r *Repository) TryStartCloning() bool { return true } +// WithFetchExclusion runs fn while holding the fetch semaphore, preventing +// concurrent git fetch operations. Use this for operations like tar that +// read the repository directory non-atomically and need a consistent view. +func (r *Repository) WithFetchExclusion(ctx context.Context, fn func() error) error { + select { + case <-r.fetchSem: + defer func() { r.fetchSem <- struct{}{} }() + return fn() + case <-ctx.Done(): + return errors.Wrap(ctx.Err(), "context cancelled waiting for fetch exclusion") + } +} + // MarkRestored configures a restored snapshot (e.g. from S3) as a mirror. // The caller must have already transitioned to StateCloning (via // TryStartCloning) before extracting the snapshot. On error the state is diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index a5d757c..fc87432 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -137,8 +137,13 @@ func (s *Strategy) generateAndUploadMirrorSnapshot(ctx context.Context, repo *gi cacheKey := mirrorSnapshotCacheKey(upstream) excludePatterns := []string{"*.lock"} - if err := repo.WithReadLock(func() error { - return snapshot.Create(ctx, s.cache, cacheKey, repo.Path(), 0, excludePatterns, s.config.ZstdThreads) + // Hold the fetch semaphore while tar-ing the bare mirror directory. + // Without this, a concurrent git fetch can replace packed-refs mid-read, + // causing tar to capture a truncated file. + if err := repo.WithFetchExclusion(ctx, func() error { + return repo.WithReadLock(func() error { + return snapshot.Create(ctx, s.cache, cacheKey, repo.Path(), 0, excludePatterns, s.config.ZstdThreads) + }) }); err != nil { return errors.Wrap(err, "create mirror snapshot") }