Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions internal/gitclone/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,21 +694,35 @@ func (r *Repository) GetUpstreamRefs(ctx context.Context) (map[string]string, er
return ParseGitRefs(output), nil
}

// repackTimeout bounds `git repack` so a slow repack on a large repository
// cannot block the scheduler queue indefinitely.
const repackTimeout = 10 * time.Minute

// Repack consolidates pack files using geometric repacking. Unlike a full
// repack (-a), geometric repacking only merges packs when there is significant
// fragmentation (many small packs), making it orders of magnitude faster on
// large repositories in steady state. The --write-midx and --write-bitmap-index
// flags maintain the multi-pack index and reachability bitmaps for efficient
// serving via git http-backend.
func (r *Repository) Repack(ctx context.Context) error {
r.mu.RLock()
defer r.mu.RUnlock()
Comment on lines -698 to -699
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why locking is removed?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

git handles its own file locking during repack so this wasn't needed


logger := logging.FromContext(ctx)
logger.InfoContext(ctx, "Full repack started", "upstream", r.upstreamURL)
logger.InfoContext(ctx, "Geometric repack started", "upstream", r.upstreamURL)

repackCtx, cancel := context.WithTimeout(ctx, repackTimeout)
defer cancel()

// #nosec G204 - r.path is controlled by us
cmd := exec.CommandContext(ctx, "git", "-C", r.path, "repack", "-adb", "--write-midx", "--write-bitmap-index")
cmd := exec.CommandContext(repackCtx, "git", "-C", r.path, "repack", "-d", "--geometric=2", "--write-midx", "--write-bitmap-index")
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Cancel = func() error {
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this will be called always at the end (defer), but process might be already gone by then.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cancel only gets invoked while the process is still running so this shouldn't be an issue

}
output, err := cmd.CombinedOutput()
if err != nil {
return errors.Wrapf(err, "git repack: %s", string(output))
}

logger.InfoContext(ctx, "Full repack completed", "upstream", r.upstreamURL)
logger.InfoContext(ctx, "Geometric repack completed", "upstream", r.upstreamURL)
return nil
}

Expand Down
16 changes: 6 additions & 10 deletions internal/strategy/git/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,17 +180,13 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, repo
return false
}

func (s *Strategy) ensureRefsUpToDate(ctx context.Context, repo *gitclone.Repository) error {
// checkRefsStale checks whether the local mirror's refs are behind upstream.
// Returns true if a fetch is needed. The caller decides whether to fetch
// synchronously or fall back to upstream.
func (s *Strategy) checkRefsStale(ctx context.Context, repo *gitclone.Repository) (bool, error) {
needsFetch, err := repo.EnsureRefsUpToDate(ctx)
if err != nil {
return errors.Wrap(err, "check upstream refs")
return false, errors.Wrap(err, "check upstream refs")
}
if needsFetch {
logger := logging.FromContext(ctx)
logger.DebugContext(ctx, "Refs stale, fetching synchronously", "upstream", repo.UpstreamURL())
if err := s.backgroundFetch(ctx, repo); err != nil {
logger.WarnContext(ctx, "Synchronous fetch failed", "upstream", repo.UpstreamURL(), "error", err)
}
}
return nil
return needsFetch, nil
}
29 changes: 19 additions & 10 deletions internal/strategy/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,18 @@ func (s *Strategy) serveReadyRepo(w http.ResponseWriter, r *http.Request, repo *
ctx := r.Context()
logger := logging.FromContext(ctx)

if isInfoRefs {
if err := s.ensureRefsUpToDate(ctx, repo); err != nil {
logger.WarnContext(ctx, "Failed to check upstream refs", "error", err)
}
stale, err := s.checkRefsStale(ctx, repo)
if err != nil {
logger.WarnContext(ctx, "Failed to check upstream refs", "upstream", repo.UpstreamURL(), "error", err)
}
if isInfoRefs && stale {
// Mirror is behind upstream. Forward to upstream so the client gets
// fresh refs immediately, and kick off a background fetch so the
// mirror catches up for subsequent requests.
logger.InfoContext(ctx, "Refs stale, forwarding to upstream and fetching in background", "upstream", repo.UpstreamURL())
s.submitFetch(repo)
s.forwardToUpstream(w, r, host, pathValue)
return
}
s.maybeBackgroundFetch(repo)

Expand Down Expand Up @@ -554,19 +562,20 @@ func (s *Strategy) maybeBackgroundFetch(repo *gitclone.Repository) {
if !repo.NeedsFetch(s.cloneManager.Config().FetchInterval) {
return
}
s.submitFetch(repo)
}

// submitFetch schedules a fetch unconditionally. Use this when ls-remote has
// already confirmed the mirror is behind upstream.
func (s *Strategy) submitFetch(repo *gitclone.Repository) {
// Use a separate queue from snapshot/repack so fetches are not serialized
// behind long-running jobs on the same upstream URL queue.
s.scheduler.Submit(repo.UpstreamURL()+"/fetch", "fetch", func(ctx context.Context) error {
return s.backgroundFetch(ctx, repo)
return s.doFetch(ctx, repo)
})
}

func (s *Strategy) backgroundFetch(ctx context.Context, repo *gitclone.Repository) error {
if !repo.NeedsFetch(s.cloneManager.Config().FetchInterval) {
return nil
}

func (s *Strategy) doFetch(ctx context.Context, repo *gitclone.Repository) error {
logger := logging.FromContext(ctx)
logger.InfoContext(ctx, "Fetching updates", "upstream", repo.UpstreamURL(), "path", repo.Path())

Expand Down
11 changes: 9 additions & 2 deletions internal/strategy/git/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,15 @@ func (s *Strategy) handleSnapshotRequest(w http.ResponseWriter, r *http.Request,
http.Error(w, "Repository unavailable", http.StatusServiceUnavailable)
return
}
if err := s.ensureRefsUpToDate(ctx, repo); err != nil {
logger.WarnContext(ctx, "Failed to check upstream refs for snapshot", "upstream", upstreamURL, "error", err)
refsStale, err := s.checkRefsStale(ctx, repo)
if err != nil {
logger.WarnContext(ctx, "Failed to check upstream refs", "upstream", upstreamURL, "error", err)
}
if refsStale {
logger.InfoContext(ctx, "Refs stale for snapshot request, fetching", "upstream", upstreamURL)
if err := repo.Fetch(ctx); err != nil {
logger.WarnContext(ctx, "Fetch for snapshot failed", "upstream", upstreamURL, "error", err)
}
}

cacheKey := snapshotCacheKey(upstreamURL)
Expand Down