From 55a2107d7056f0dbc2f6ba345d8d5a82d4d3cf61 Mon Sep 17 00:00:00 2001 From: Elizabeth Worstell Date: Thu, 12 Mar 2026 22:27:47 -0700 Subject: [PATCH] fix: prevent concurrent snapshot restores from corrupting packed-refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ensureCloneReady allowed multiple concurrent goroutines to enter startClone simultaneously. Each goroutine extracted the mirror snapshot tarball into the same directory, corrupting packed-refs when one goroutine's tar overwrote the file while another was running git fetch. Add TryStartCloning() to atomically gate the StateEmpty→StateCloning transition so only the winning goroutine performs the restore or clone. Update MarkRestored and Clone to accept StateCloning (since the caller already transitioned) so the fallback from failed restore to fresh clone still works. Amp-Thread-ID: https://ampcode.com/threads/T-019ce597-b4ff-72ad-b096-d2851a7058ff Co-authored-by: Amp --- internal/gitclone/manager.go | 31 +++++++++++++++++++------------ internal/strategy/git/git.go | 8 ++++++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/internal/gitclone/manager.go b/internal/gitclone/manager.go index f2b48c3..0b1b20b 100644 --- a/internal/gitclone/manager.go +++ b/internal/gitclone/manager.go @@ -320,14 +320,26 @@ func WithReadLockReturn[T any](repo *Repository, fn func() (T, error)) (T, error return fn() } -// MarkRestored configures a restored snapshot (e.g. from S3) as a mirror and -// leaves the repository in StateCloning. The caller must call MarkReady after -// a successful catch-up fetch to transition to StateReady. While in -// StateCloning, requests are proxied to upstream instead of served from the -// potentially-stale local mirror. -func (r *Repository) MarkRestored(ctx context.Context) error { +// TryStartCloning atomically transitions the repository from StateEmpty to +// StateCloning. Returns true if this goroutine won the transition and should +// proceed with the clone/restore; false if another goroutine already claimed it. +func (r *Repository) TryStartCloning() bool { r.mu.Lock() + defer r.mu.Unlock() if r.state != StateEmpty { + return false + } + r.state = StateCloning + return true +} + +// MarkRestored configures a restored snapshot (e.g. from S3) as a mirror. +// The caller must have already transitioned to StateCloning (via +// TryStartCloning) before extracting the snapshot. On error the state is +// left as StateCloning so the caller can fall back to a fresh clone. +func (r *Repository) MarkRestored(ctx context.Context) error { + r.mu.Lock() + if r.state == StateReady { r.mu.Unlock() return nil } @@ -338,14 +350,9 @@ func (r *Repository) MarkRestored(ctx context.Context) error { if err == nil && r.config.Maintenance { err = registerMaintenance(ctx, r.path) } - - r.mu.Lock() if err != nil { - r.state = StateEmpty - r.mu.Unlock() return errors.Wrap(err, "configure mirror after restore") } - r.mu.Unlock() return nil } @@ -360,7 +367,7 @@ func (r *Repository) MarkReady() { func (r *Repository) Clone(ctx context.Context) error { r.mu.Lock() - if r.state != StateEmpty { + if r.state == StateReady { r.mu.Unlock() return nil } diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 578cfea..7424588 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -443,6 +443,14 @@ func (s *Strategy) ensureCloneReady(ctx context.Context, repo *gitclone.Reposito } func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) { + // Atomically claim the clone so only one goroutine performs the restore + // or clone. Without this gate, concurrent snapshot requests each call + // startClone and extract tarballs over the same directory, corrupting + // packed-refs and other git metadata. + if !repo.TryStartCloning() { + return + } + logger := logging.FromContext(ctx) upstream := repo.UpstreamURL()