diff --git a/internal/gitclone/manager.go b/internal/gitclone/manager.go index 79de399..8834ae8 100644 --- a/internal/gitclone/manager.go +++ b/internal/gitclone/manager.go @@ -259,6 +259,14 @@ func (r *Repository) Clone(ctx context.Context, config Config) error { return nil } +func gitConfigArgs(config GitTuningConfig) []string { + return []string{ + "-c", "http.postBuffer=" + strconv.Itoa(config.PostBuffer), + "-c", "http.lowSpeedLimit=" + strconv.Itoa(config.LowSpeedLimit), + "-c", "http.lowSpeedTime=" + strconv.Itoa(int(config.LowSpeedTime.Seconds())), + } +} + func (r *Repository) executeClone(ctx context.Context, config Config) error { if err := os.MkdirAll(filepath.Dir(r.path), 0o750); err != nil { return errors.Wrap(err, "create clone directory") @@ -269,11 +277,8 @@ func (r *Repository) executeClone(ctx context.Context, config Config) error { if config.CloneDepth > 0 { args = append(args, "--depth", strconv.Itoa(config.CloneDepth)) } - args = append(args, - "-c", "http.postBuffer="+strconv.Itoa(config.GitConfig.PostBuffer), - "-c", "http.lowSpeedLimit="+strconv.Itoa(config.GitConfig.LowSpeedLimit), - "-c", "http.lowSpeedTime="+strconv.Itoa(int(config.GitConfig.LowSpeedTime.Seconds())), - r.upstreamURL, r.path) + args = append(args, gitConfigArgs(config.GitConfig)...) + args = append(args, r.upstreamURL, r.path) cmd, err := gitCommand(ctx, r.upstreamURL, args...) if err != nil { @@ -291,11 +296,10 @@ func (r *Repository) executeClone(ctx context.Context, config Config) error { return errors.Wrapf(err, "configure fetch refspec: %s", string(output)) } - cmd, err = gitCommand(ctx, r.upstreamURL, "-C", r.path, - "-c", "http.postBuffer="+strconv.Itoa(config.GitConfig.PostBuffer), - "-c", "http.lowSpeedLimit="+strconv.Itoa(config.GitConfig.LowSpeedLimit), - "-c", "http.lowSpeedTime="+strconv.Itoa(int(config.GitConfig.LowSpeedTime.Seconds())), - "fetch", "--all") + args = []string{"-C", r.path} + args = append(args, gitConfigArgs(config.GitConfig)...) + args = append(args, "fetch", "--all") + cmd, err = gitCommand(ctx, r.upstreamURL, args...) if err != nil { return errors.Wrap(err, "create git command for fetch") } @@ -328,11 +332,10 @@ func (r *Repository) Fetch(ctx context.Context, config Config) error { r.mu.Lock() // #nosec G204 - r.path is controlled by us - cmd, err := gitCommand(ctx, r.upstreamURL, "-C", r.path, - "-c", "http.postBuffer="+strconv.Itoa(config.GitConfig.PostBuffer), - "-c", "http.lowSpeedLimit="+strconv.Itoa(config.GitConfig.LowSpeedLimit), - "-c", "http.lowSpeedTime="+strconv.Itoa(int(config.GitConfig.LowSpeedTime.Seconds())), - "remote", "update", "--prune") + args := []string{"-C", r.path} + args = append(args, gitConfigArgs(config.GitConfig)...) + args = append(args, "remote", "update", "--prune") + cmd, err := gitCommand(ctx, r.upstreamURL, args...) if err != nil { return errors.Wrap(err, "create git command") } @@ -429,3 +432,203 @@ func (r *Repository) GetUpstreamRefs(ctx context.Context) (map[string]string, er return ParseGitRefs(output), nil } + +type CommitError struct { + SHA string + NotFound bool // doesn't exist anywhere (locally or upstream) + NotFetched bool // exists upstream but not locally + Err error // underlying error +} + +func (e *CommitError) Error() string { + if e.NotFound { + return "commit " + e.SHA + " not found" + } + if e.NotFetched { + return "commit " + e.SHA + " exists upstream but not fetched locally" + } + if e.Err != nil { + return "commit " + e.SHA + ": " + e.Err.Error() + } + return "commit " + e.SHA + ": unknown error" +} + +func (e *CommitError) Unwrap() error { + return e.Err +} + +func (r *Repository) HasCommit(ctx context.Context, sha string) (bool, error) { + r.mu.RLock() + defer r.mu.RUnlock() + + if r.state != StateReady { + return false, errors.New("repository not ready") + } + + // #nosec G204 - r.path is controlled by us + cmd := exec.CommandContext(ctx, "git", "-C", r.path, "cat-file", "-e", sha) + err := cmd.Run() + if err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + return false, nil + } + return false, errors.Wrap(err, "git cat-file") + } + + return true, nil +} + +func (r *Repository) CommitExistsUpstream(ctx context.Context, sha string) (bool, error) { + // #nosec G204 - r.upstreamURL is controlled by us + cmd, err := gitCommand(ctx, r.upstreamURL, "ls-remote", r.upstreamURL, sha) + if err != nil { + return false, errors.Wrap(err, "create git command") + } + + output, err := cmd.CombinedOutput() + if err != nil { + return false, errors.Wrap(err, "git ls-remote") + } + + return len(strings.TrimSpace(string(output))) > 0, nil +} + +func (r *Repository) FetchCommit(ctx context.Context, sha string, config Config) error { + r.mu.Lock() + defer r.mu.Unlock() + + if r.state != StateReady { + return errors.New("repository not ready") + } + + shallowFile := filepath.Join(r.path, ".git", "shallow") + isShallow := false + if _, err := os.Stat(shallowFile); err == nil { + isShallow = true + } + + // #nosec G204 - r.path is controlled by us + args := []string{"-C", r.path} + args = append(args, gitConfigArgs(config.GitConfig)...) + args = append(args, "fetch", "origin", sha) + + // For shallow clones, we need to use --depth or --deepen to fetch specific commits + if isShallow { + args = append(args, "--depth=1") + } + + cmd, err := gitCommand(ctx, r.upstreamURL, args...) + if err != nil { + return errors.Wrap(err, "create git command") + } + + output, err := cmd.CombinedOutput() + if err != nil { + if isShallow && strings.Contains(string(output), "shallow") { + r.mu.Unlock() + unshallowErr := r.Unshallow(ctx, config) + r.mu.Lock() + if unshallowErr != nil { + return errors.Wrapf(err, "git fetch commit %s failed and unshallow also failed: %v: %s", sha, unshallowErr, string(output)) + } + + args = []string{"-C", r.path} + args = append(args, gitConfigArgs(config.GitConfig)...) + args = append(args, "fetch", "origin", sha) + cmd, err = gitCommand(ctx, r.upstreamURL, args...) + if err != nil { + return errors.Wrap(err, "create git command for retry") + } + output, err = cmd.CombinedOutput() + if err != nil { + return errors.Wrapf(err, "git fetch commit %s after unshallow: %s", sha, string(output)) + } + return nil + } + return errors.Wrapf(err, "git fetch commit %s: %s", sha, string(output)) + } + + return nil +} + +func (r *Repository) ResolveCommit(ctx context.Context, sha string, config Config) error { + hasCommit, err := r.HasCommit(ctx, sha) + if err != nil { + return &CommitError{SHA: sha, Err: err} + } + + if hasCommit { + return nil + } + + existsUpstream, err := r.CommitExistsUpstream(ctx, sha) + if err != nil { + return &CommitError{SHA: sha, Err: err} + } + + if !existsUpstream { + return &CommitError{SHA: sha, NotFound: true} + } + + if err := r.FetchCommit(ctx, sha, config); err != nil { + return &CommitError{SHA: sha, NotFetched: true, Err: err} + } + + return nil +} + +func (r *Repository) FetchRef(ctx context.Context, ref string, config Config) error { + r.mu.Lock() + defer r.mu.Unlock() + + if r.state != StateReady { + return errors.New("repository not ready") + } + + // #nosec G204 - r.path is controlled by us + args := []string{"-C", r.path} + args = append(args, gitConfigArgs(config.GitConfig)...) + args = append(args, "fetch", "origin", ref) + cmd, err := gitCommand(ctx, r.upstreamURL, args...) + if err != nil { + return errors.Wrap(err, "create git command") + } + + output, err := cmd.CombinedOutput() + if err != nil { + return errors.Wrapf(err, "git fetch ref %s: %s", ref, string(output)) + } + + return nil +} + +func (r *Repository) Unshallow(ctx context.Context, config Config) error { + r.mu.Lock() + defer r.mu.Unlock() + + if r.state != StateReady { + return errors.New("repository not ready") + } + + shallowFile := filepath.Join(r.path, ".git", "shallow") + if _, err := os.Stat(shallowFile); errors.Is(err, os.ErrNotExist) { + return nil + } + + // #nosec G204 - r.path is controlled by us + args := []string{"-C", r.path} + args = append(args, gitConfigArgs(config.GitConfig)...) + args = append(args, "fetch", "--unshallow") + cmd, err := gitCommand(ctx, r.upstreamURL, args...) + if err != nil { + return errors.Wrap(err, "create git command") + } + + output, err := cmd.CombinedOutput() + if err != nil { + return errors.Wrapf(err, "git fetch --unshallow: %s", string(output)) + } + + return nil +} diff --git a/internal/gitclone/manager_test.go b/internal/gitclone/manager_test.go index 6ad97df..b238f4d 100644 --- a/internal/gitclone/manager_test.go +++ b/internal/gitclone/manager_test.go @@ -2,8 +2,11 @@ package gitclone //nolint:testpackage // white-box testing required for unexport import ( "context" + "errors" "os" + "os/exec" "path/filepath" + "strings" "testing" "time" @@ -214,3 +217,139 @@ func TestState_String(t *testing.T) { assert.Equal(t, "cloning", StateCloning.String()) assert.Equal(t, "ready", StateReady.String()) } + +func TestCommitError_Error(t *testing.T) { + tests := []struct { + name string + err *CommitError + expected string + }{ + { + name: "not found", + err: &CommitError{ + SHA: "abc123", + NotFound: true, + }, + expected: "commit abc123 not found", + }, + { + name: "not fetched", + err: &CommitError{ + SHA: "def456", + NotFetched: true, + }, + expected: "commit def456 exists upstream but not fetched locally", + }, + { + name: "with underlying error", + err: &CommitError{ + SHA: "xyz789", + Err: errors.New("network timeout"), + }, + expected: "commit xyz789: network timeout", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, tt.err.Error()) + }) + } +} + +func TestRepository_HasCommit_NotReady(t *testing.T) { + repo := &Repository{ + state: StateEmpty, + path: "/tmp/test", + upstreamURL: "https://github.com/user/repo", + fetchSem: make(chan struct{}, 1), + } + repo.fetchSem <- struct{}{} + + hasCommit, err := repo.HasCommit(context.Background(), "abc123") + assert.Error(t, err) + assert.False(t, hasCommit) + assert.Contains(t, err.Error(), "repository not ready") +} + +func TestRepository_FetchCommit_MissingCommit(t *testing.T) { + tmpDir := t.TempDir() + upstreamPath := filepath.Join(tmpDir, "upstream") + localPath := filepath.Join(tmpDir, "local") + + // Create upstream repository with initial commit + assert.NoError(t, os.MkdirAll(upstreamPath, 0o755)) + cmd := exec.Command("git", "init", upstreamPath) + output, err := cmd.CombinedOutput() + assert.NoError(t, err, "git init upstream failed: %s", string(output)) + + cmd = exec.Command("git", "-C", upstreamPath, "config", "user.email", "test@example.com") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, "git config failed: %s", string(output)) + + cmd = exec.Command("git", "-C", upstreamPath, "config", "user.name", "Test User") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, "git config failed: %s", string(output)) + + // Create initial commit + testFile := filepath.Join(upstreamPath, "file1.txt") + assert.NoError(t, os.WriteFile(testFile, []byte("content 1"), 0o644)) + cmd = exec.Command("git", "-C", upstreamPath, "add", "file1.txt") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, "git add failed: %s", string(output)) + + cmd = exec.Command("git", "-C", upstreamPath, "commit", "-m", "Initial commit") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, "git commit failed: %s", string(output)) + + // Clone the repository (this represents our cache) + cmd = exec.Command("git", "clone", upstreamPath, localPath) + output, err = cmd.CombinedOutput() + assert.NoError(t, err, "git clone failed: %s", string(output)) + + // Now add a NEW commit to upstream (after the clone, so local doesn't have it) + testFile2 := filepath.Join(upstreamPath, "file2.txt") + assert.NoError(t, os.WriteFile(testFile2, []byte("content 2"), 0o644)) + cmd = exec.Command("git", "-C", upstreamPath, "add", "file2.txt") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, "git add failed: %s", string(output)) + + cmd = exec.Command("git", "-C", upstreamPath, "commit", "-m", "New commit") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, "git commit failed: %s", string(output)) + + // Get the SHA of the new commit + cmd = exec.Command("git", "-C", upstreamPath, "rev-parse", "HEAD") + output, err = cmd.CombinedOutput() + assert.NoError(t, err, "git rev-parse failed: %s", string(output)) + newCommitSHA := strings.TrimSpace(string(output)) + + // Create a Repository instance + repo := &Repository{ + state: StateReady, + path: localPath, + upstreamURL: upstreamPath, + fetchSem: make(chan struct{}, 1), + } + repo.fetchSem <- struct{}{} + + // Verify the new commit is NOT available locally yet + hasCommit, err := repo.HasCommit(context.Background(), newCommitSHA) + assert.NoError(t, err) + assert.False(t, hasCommit, "new commit should not be in local cache yet") + + // Now fetch the missing commit directly + config := Config{ + RootDir: tmpDir, + FetchInterval: 15 * time.Minute, + RefCheckInterval: 10 * time.Second, + GitConfig: DefaultGitTuningConfig(), + } + err = repo.FetchCommit(context.Background(), newCommitSHA, config) + assert.NoError(t, err, "FetchCommit should fetch the missing commit") + + // Verify the commit is now available locally + hasCommit, err = repo.HasCommit(context.Background(), newCommitSHA) + assert.NoError(t, err) + assert.True(t, hasCommit, "new commit should now be available after FetchCommit") +} diff --git a/internal/strategy/git/backend.go b/internal/strategy/git/backend.go index 677a155..32c032d 100644 --- a/internal/strategy/git/backend.go +++ b/internal/strategy/git/backend.go @@ -85,15 +85,19 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, repo } func (s *Strategy) ensureRefsUpToDate(ctx context.Context, repo *gitclone.Repository) error { - gitcloneConfig := gitclone.Config{ + config := s.gitcloneConfig() + if err := repo.EnsureRefsUpToDate(ctx, config); err != nil { + return errors.Wrap(err, "ensure refs up to date") + } + return nil +} + +func (s *Strategy) gitcloneConfig() gitclone.Config { + return gitclone.Config{ RootDir: s.config.MirrorRoot, FetchInterval: s.config.FetchInterval, RefCheckInterval: s.config.RefCheckInterval, CloneDepth: s.config.CloneDepth, GitConfig: gitclone.DefaultGitTuningConfig(), } - if err := repo.EnsureRefsUpToDate(ctx, gitcloneConfig); err != nil { - return errors.Wrap(err, "ensure refs up to date") - } - return nil } diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index c1605d1..f7b3783 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -236,13 +236,7 @@ func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) { slog.String("upstream", repo.UpstreamURL()), slog.String("path", repo.Path())) - gitcloneConfig := gitclone.Config{ - RootDir: s.config.MirrorRoot, - FetchInterval: s.config.FetchInterval, - RefCheckInterval: s.config.RefCheckInterval, - CloneDepth: s.config.CloneDepth, - GitConfig: gitclone.DefaultGitTuningConfig(), - } + gitcloneConfig := s.gitcloneConfig() err := repo.Clone(ctx, gitcloneConfig) @@ -284,13 +278,7 @@ func (s *Strategy) backgroundFetch(ctx context.Context, repo *gitclone.Repositor slog.String("upstream", repo.UpstreamURL()), slog.String("path", repo.Path())) - gitcloneConfig := gitclone.Config{ - RootDir: s.config.MirrorRoot, - FetchInterval: s.config.FetchInterval, - RefCheckInterval: s.config.RefCheckInterval, - CloneDepth: s.config.CloneDepth, - GitConfig: gitclone.DefaultGitTuningConfig(), - } + gitcloneConfig := s.gitcloneConfig() if err := repo.Fetch(ctx, gitcloneConfig); err != nil { logger.ErrorContext(ctx, "Fetch failed",