diff --git a/internal/gitclone/manager.go b/internal/gitclone/manager.go index 06f0609..576904b 100644 --- a/internal/gitclone/manager.go +++ b/internal/gitclone/manager.go @@ -694,21 +694,35 @@ func (r *Repository) GetUpstreamRefs(ctx context.Context) (map[string]string, er return ParseGitRefs(output), nil } +// repackTimeout bounds `git repack` so a slow repack on a large repository +// cannot block the scheduler queue indefinitely. +const repackTimeout = 10 * time.Minute + +// Repack consolidates pack files using geometric repacking. Unlike a full +// repack (-a), geometric repacking only merges packs when there is significant +// fragmentation (many small packs), making it orders of magnitude faster on +// large repositories in steady state. The --write-midx and --write-bitmap-index +// flags maintain the multi-pack index and reachability bitmaps for efficient +// serving via git http-backend. func (r *Repository) Repack(ctx context.Context) error { - r.mu.RLock() - defer r.mu.RUnlock() - logger := logging.FromContext(ctx) - logger.InfoContext(ctx, "Full repack started", "upstream", r.upstreamURL) + logger.InfoContext(ctx, "Geometric repack started", "upstream", r.upstreamURL) + + repackCtx, cancel := context.WithTimeout(ctx, repackTimeout) + defer cancel() // #nosec G204 - r.path is controlled by us - cmd := exec.CommandContext(ctx, "git", "-C", r.path, "repack", "-adb", "--write-midx", "--write-bitmap-index") + cmd := exec.CommandContext(repackCtx, "git", "-C", r.path, "repack", "-d", "--geometric=2", "--write-midx", "--write-bitmap-index") + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Cancel = func() error { + return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } output, err := cmd.CombinedOutput() if err != nil { return errors.Wrapf(err, "git repack: %s", string(output)) } - logger.InfoContext(ctx, "Full repack completed", "upstream", r.upstreamURL) + logger.InfoContext(ctx, "Geometric repack completed", "upstream", r.upstreamURL) return nil } diff --git a/internal/strategy/git/backend.go b/internal/strategy/git/backend.go index cd31b84..4c70d17 100644 --- a/internal/strategy/git/backend.go +++ b/internal/strategy/git/backend.go @@ -180,17 +180,13 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, repo return false } -func (s *Strategy) ensureRefsUpToDate(ctx context.Context, repo *gitclone.Repository) error { +// checkRefsStale checks whether the local mirror's refs are behind upstream. +// Returns true if a fetch is needed. The caller decides whether to fetch +// synchronously or fall back to upstream. +func (s *Strategy) checkRefsStale(ctx context.Context, repo *gitclone.Repository) (bool, error) { needsFetch, err := repo.EnsureRefsUpToDate(ctx) if err != nil { - return errors.Wrap(err, "check upstream refs") + return false, errors.Wrap(err, "check upstream refs") } - if needsFetch { - logger := logging.FromContext(ctx) - logger.DebugContext(ctx, "Refs stale, fetching synchronously", "upstream", repo.UpstreamURL()) - if err := s.backgroundFetch(ctx, repo); err != nil { - logger.WarnContext(ctx, "Synchronous fetch failed", "upstream", repo.UpstreamURL(), "error", err) - } - } - return nil + return needsFetch, nil } diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index 45c7ba2..f719bb3 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -261,10 +261,18 @@ func (s *Strategy) serveReadyRepo(w http.ResponseWriter, r *http.Request, repo * ctx := r.Context() logger := logging.FromContext(ctx) - if isInfoRefs { - if err := s.ensureRefsUpToDate(ctx, repo); err != nil { - logger.WarnContext(ctx, "Failed to check upstream refs", "error", err) - } + stale, err := s.checkRefsStale(ctx, repo) + if err != nil { + logger.WarnContext(ctx, "Failed to check upstream refs", "upstream", repo.UpstreamURL(), "error", err) + } + if isInfoRefs && stale { + // Mirror is behind upstream. Forward to upstream so the client gets + // fresh refs immediately, and kick off a background fetch so the + // mirror catches up for subsequent requests. + logger.InfoContext(ctx, "Refs stale, forwarding to upstream and fetching in background", "upstream", repo.UpstreamURL()) + s.submitFetch(repo) + s.forwardToUpstream(w, r, host, pathValue) + return } s.maybeBackgroundFetch(repo) @@ -554,19 +562,20 @@ func (s *Strategy) maybeBackgroundFetch(repo *gitclone.Repository) { if !repo.NeedsFetch(s.cloneManager.Config().FetchInterval) { return } + s.submitFetch(repo) +} +// submitFetch schedules a fetch unconditionally. Use this when ls-remote has +// already confirmed the mirror is behind upstream. +func (s *Strategy) submitFetch(repo *gitclone.Repository) { // Use a separate queue from snapshot/repack so fetches are not serialized // behind long-running jobs on the same upstream URL queue. s.scheduler.Submit(repo.UpstreamURL()+"/fetch", "fetch", func(ctx context.Context) error { - return s.backgroundFetch(ctx, repo) + return s.doFetch(ctx, repo) }) } -func (s *Strategy) backgroundFetch(ctx context.Context, repo *gitclone.Repository) error { - if !repo.NeedsFetch(s.cloneManager.Config().FetchInterval) { - return nil - } - +func (s *Strategy) doFetch(ctx context.Context, repo *gitclone.Repository) error { logger := logging.FromContext(ctx) logger.InfoContext(ctx, "Fetching updates", "upstream", repo.UpstreamURL(), "path", repo.Path()) diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index fc87432..45079e8 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -190,8 +190,15 @@ func (s *Strategy) handleSnapshotRequest(w http.ResponseWriter, r *http.Request, http.Error(w, "Repository unavailable", http.StatusServiceUnavailable) return } - if err := s.ensureRefsUpToDate(ctx, repo); err != nil { - logger.WarnContext(ctx, "Failed to check upstream refs for snapshot", "upstream", upstreamURL, "error", err) + refsStale, err := s.checkRefsStale(ctx, repo) + if err != nil { + logger.WarnContext(ctx, "Failed to check upstream refs", "upstream", upstreamURL, "error", err) + } + if refsStale { + logger.InfoContext(ctx, "Refs stale for snapshot request, fetching", "upstream", upstreamURL) + if err := repo.Fetch(ctx); err != nil { + logger.WarnContext(ctx, "Fetch for snapshot failed", "upstream", upstreamURL, "error", err) + } } cacheKey := snapshotCacheKey(upstreamURL)