diff --git a/go.mod b/go.mod index 270f666..00d68a5 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,7 @@ require ( go.opentelemetry.io/otel/metric v1.40.0 go.opentelemetry.io/otel/sdk v1.40.0 go.opentelemetry.io/otel/sdk/metric v1.40.0 + golang.org/x/mod v0.31.0 ) require ( @@ -50,7 +51,6 @@ require ( go.opentelemetry.io/proto/otlp v1.9.0 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect golang.org/x/crypto v0.46.0 // indirect - golang.org/x/mod v0.31.0 // indirect golang.org/x/net v0.48.0 // indirect golang.org/x/sys v0.40.0 // indirect golang.org/x/text v0.32.0 // indirect diff --git a/internal/gitclone/manager.go b/internal/gitclone/manager.go index 79de399..d0b8ac7 100644 --- a/internal/gitclone/manager.go +++ b/internal/gitclone/manager.go @@ -15,6 +15,23 @@ import ( "github.com/alecthomas/errors" ) +var ( + sharedManager *Manager + sharedManagerMu sync.RWMutex +) + +func SetShared(m *Manager) { + sharedManagerMu.Lock() + defer sharedManagerMu.Unlock() + sharedManager = m +} + +func GetShared() *Manager { + sharedManagerMu.RLock() + defer sharedManagerMu.RUnlock() + return sharedManager +} + type State int const ( @@ -132,6 +149,10 @@ func (m *Manager) Get(upstreamURL string) *Repository { return m.clones[upstreamURL] } +func (m *Manager) Config() Config { + return m.config +} + func (m *Manager) DiscoverExisting(_ context.Context) error { err := filepath.Walk(m.config.RootDir, func(path string, info os.FileInfo, err error) error { if err != nil { @@ -429,3 +450,13 @@ func (r *Repository) GetUpstreamRefs(ctx context.Context) (map[string]string, er return ParseGitRefs(output), nil } + +func (r *Repository) HasCommit(ctx context.Context, ref string) bool { + r.mu.RLock() + defer r.mu.RUnlock() + + // #nosec G204 - r.path and ref are controlled by us + cmd := exec.CommandContext(ctx, "git", "-C", r.path, "cat-file", "-e", ref) + err := cmd.Run() + return err == nil +} diff --git a/internal/gitclone/manager_test.go b/internal/gitclone/manager_test.go index 6ad97df..9dc86b7 100644 --- a/internal/gitclone/manager_test.go +++ b/internal/gitclone/manager_test.go @@ -3,6 +3,7 @@ package gitclone //nolint:testpackage // white-box testing required for unexport import ( "context" "os" + "os/exec" "path/filepath" "testing" "time" @@ -214,3 +215,43 @@ func TestState_String(t *testing.T) { assert.Equal(t, "cloning", StateCloning.String()) assert.Equal(t, "ready", StateReady.String()) } + +func TestRepository_HasCommit(t *testing.T) { + ctx := context.Background() + tmpDir := t.TempDir() + repoPath := filepath.Join(tmpDir, "test-repo") + + assert.NoError(t, os.MkdirAll(repoPath, 0o755)) + + cmd := exec.Command("git", "-C", repoPath, "init") + assert.NoError(t, cmd.Run()) + + cmd = exec.Command("git", "-C", repoPath, "config", "user.email", "test@example.com") + assert.NoError(t, cmd.Run()) + cmd = exec.Command("git", "-C", repoPath, "config", "user.name", "Test User") + assert.NoError(t, cmd.Run()) + + testFile := filepath.Join(repoPath, "test.txt") + assert.NoError(t, os.WriteFile(testFile, []byte("test content"), 0o644)) + cmd = exec.Command("git", "-C", repoPath, "add", "test.txt") + assert.NoError(t, cmd.Run()) + cmd = exec.Command("git", "-C", repoPath, "commit", "-m", "Initial commit") + assert.NoError(t, cmd.Run()) + + cmd = exec.Command("git", "-C", repoPath, "tag", "v1.0.0") + assert.NoError(t, cmd.Run()) + + repo := &Repository{ + state: StateReady, + path: repoPath, + upstreamURL: "https://example.com/test-repo", + fetchSem: make(chan struct{}, 1), + } + repo.fetchSem <- struct{}{} + + assert.True(t, repo.HasCommit(ctx, "HEAD")) + assert.True(t, repo.HasCommit(ctx, "v1.0.0")) + + assert.False(t, repo.HasCommit(ctx, "nonexistent")) + assert.False(t, repo.HasCommit(ctx, "v9.9.9")) +} diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index b3a35b7..4863895 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -70,6 +70,8 @@ func New(ctx context.Context, config Config, scheduler jobscheduler.Scheduler, c return nil, errors.Wrap(err, "create clone manager") } + gitclone.SetShared(cloneManager) + s := &Strategy{ config: config, cache: cache, diff --git a/internal/strategy/gomod/fetcher.go b/internal/strategy/gomod/fetcher.go new file mode 100644 index 0000000..c3d98c8 --- /dev/null +++ b/internal/strategy/gomod/fetcher.go @@ -0,0 +1,73 @@ +package gomod + +import ( + "context" + "io" + "path" + "strings" + "time" + + "github.com/alecthomas/errors" + "github.com/goproxy/goproxy" +) + +// CompositeFetcher routes module requests to either public or private fetchers based on module path patterns. +type CompositeFetcher struct { + publicFetcher goproxy.Fetcher + privateFetcher goproxy.Fetcher + patterns []string +} + +func NewCompositeFetcher( + publicFetcher goproxy.Fetcher, + privateFetcher goproxy.Fetcher, + patterns []string, +) *CompositeFetcher { + return &CompositeFetcher{ + publicFetcher: publicFetcher, + privateFetcher: privateFetcher, + patterns: patterns, + } +} + +func (c *CompositeFetcher) IsPrivate(modulePath string) bool { + for _, pattern := range c.patterns { + matched, err := path.Match(pattern, modulePath) + if err == nil && matched { + return true + } + + if strings.HasPrefix(modulePath, pattern+"/") || modulePath == pattern { + return true + } + } + + return false +} + +func (c *CompositeFetcher) Query(ctx context.Context, path, query string) (version string, t time.Time, err error) { + if c.IsPrivate(path) { + v, tm, err := c.privateFetcher.Query(ctx, path, query) + return v, tm, errors.Wrap(err, "private fetcher query") + } + v, tm, err := c.publicFetcher.Query(ctx, path, query) + return v, tm, errors.Wrap(err, "public fetcher query") +} + +func (c *CompositeFetcher) List(ctx context.Context, path string) (versions []string, err error) { + if c.IsPrivate(path) { + v, err := c.privateFetcher.List(ctx, path) + return v, errors.Wrap(err, "private fetcher list") + } + v, err := c.publicFetcher.List(ctx, path) + return v, errors.Wrap(err, "public fetcher list") +} + +func (c *CompositeFetcher) Download(ctx context.Context, path, version string) (info, mod, zip io.ReadSeekCloser, err error) { + if c.IsPrivate(path) { + i, m, z, err := c.privateFetcher.Download(ctx, path, version) + return i, m, z, errors.Wrap(err, "private fetcher download") + } + i, m, z, err := c.publicFetcher.Download(ctx, path, version) + return i, m, z, errors.Wrap(err, "public fetcher download") +} diff --git a/internal/strategy/gomod/fetcher_test.go b/internal/strategy/gomod/fetcher_test.go new file mode 100644 index 0000000..7f365cb --- /dev/null +++ b/internal/strategy/gomod/fetcher_test.go @@ -0,0 +1,123 @@ +package gomod_test + +import ( + "testing" + + "github.com/block/cachew/internal/strategy/gomod" +) + +func TestCompositeFetcher_isPrivate(t *testing.T) { + tests := []struct { + name string + patterns []string + modulePath string + want bool + }{ + { + name: "exact match single pattern", + patterns: []string{"github.com/squareup"}, + modulePath: "github.com/squareup", + want: true, + }, + { + name: "exact match with multiple patterns", + patterns: []string{"github.com/org1", "github.com/squareup", "github.com/org2"}, + modulePath: "github.com/squareup", + want: true, + }, + { + name: "prefix match - one level deep", + patterns: []string{"github.com/squareup"}, + modulePath: "github.com/squareup/repo", + want: true, + }, + { + name: "prefix match - two levels deep", + patterns: []string{"github.com/squareup"}, + modulePath: "github.com/squareup/repo/submodule", + want: true, + }, + { + name: "prefix match with multiple patterns", + patterns: []string{"github.com/org1", "github.com/squareup"}, + modulePath: "github.com/squareup/repo", + want: true, + }, + { + name: "wildcard match", + patterns: []string{"github.com/squareup/*"}, + modulePath: "github.com/squareup/repo", + want: true, + }, + { + name: "wildcard match - multiple levels", + patterns: []string{"github.com/*/*"}, + modulePath: "github.com/squareup/repo", + want: true, + }, + { + name: "no match - different org", + patterns: []string{"github.com/squareup"}, + modulePath: "github.com/other/repo", + want: false, + }, + { + name: "no match - different host", + patterns: []string{"github.com/squareup"}, + modulePath: "gitlab.com/squareup/repo", + want: false, + }, + { + name: "no match - prefix without slash", + patterns: []string{"github.com/square"}, + modulePath: "github.com/squareup/repo", + want: false, + }, + { + name: "no match - empty patterns", + patterns: []string{}, + modulePath: "github.com/squareup/repo", + want: false, + }, + { + name: "empty module path", + patterns: []string{"github.com/squareup"}, + modulePath: "", + want: false, + }, + { + name: "multiple patterns with no match", + patterns: []string{"github.com/org1", "github.com/org2", "github.com/org3"}, + modulePath: "github.com/squareup/repo", + want: false, + }, + { + name: "pattern with trailing slash", + patterns: []string{"github.com/squareup/"}, + modulePath: "github.com/squareup/repo", + want: false, + }, + { + name: "gopkg.in pattern", + patterns: []string{"gopkg.in/square"}, + modulePath: "gopkg.in/square/go-jose.v2", + want: true, + }, + { + name: "nested GitHub org pattern", + patterns: []string{"github.com/squareup/internal"}, + modulePath: "github.com/squareup/internal/auth", + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fetcher := gomod.NewCompositeFetcher(nil, nil, tt.patterns) + got := fetcher.IsPrivate(tt.modulePath) + if got != tt.want { + t.Errorf("IsPrivate() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/internal/strategy/gomod/gomod.go b/internal/strategy/gomod/gomod.go index a7e3fdf..36ff279 100644 --- a/internal/strategy/gomod/gomod.go +++ b/internal/strategy/gomod/gomod.go @@ -7,9 +7,11 @@ import ( "net/http" "net/url" + "github.com/alecthomas/errors" "github.com/goproxy/goproxy" "github.com/block/cachew/internal/cache" + "github.com/block/cachew/internal/gitclone" "github.com/block/cachew/internal/logging" "github.com/block/cachew/internal/strategy" ) @@ -19,15 +21,17 @@ func Register(r *strategy.Registry) { } type Config struct { - Proxy string `hcl:"proxy,optional" help:"Upstream Go module proxy URL (defaults to proxy.golang.org)" default:"https://proxy.golang.org"` + Proxy string `hcl:"proxy,optional" help:"Upstream Go module proxy URL (defaults to proxy.golang.org)" default:"https://proxy.golang.org"` + PrivatePaths []string `hcl:"private-paths,optional" help:"Module path patterns for private repositories"` } type Strategy struct { - config Config - cache cache.Cache - logger *slog.Logger - proxy *url.URL - goproxy *goproxy.Goproxy + config Config + cache cache.Cache + logger *slog.Logger + proxy *url.URL + goproxy *goproxy.Goproxy + cloneManager *gitclone.Manager } var _ strategy.Strategy = (*Strategy)(nil) @@ -45,14 +49,32 @@ func New(ctx context.Context, config Config, cache cache.Cache, mux strategy.Mux proxy: parsedURL, } - s.goproxy = &goproxy.Goproxy{ - Logger: s.logger, - Fetcher: &goproxy.GoFetcher{ - Env: []string{ - "GOPROXY=" + config.Proxy, - "GOSUMDB=off", // Disable checksum database validation in fetcher, to prevent unneccessary double validation - }, + publicFetcher := &goproxy.GoFetcher{ + Env: []string{ + "GOPROXY=" + config.Proxy, + "GOSUMDB=off", // Disable checksum database validation in fetcher, to prevent unneccessary double validation }, + } + + var fetcher goproxy.Fetcher = publicFetcher + + if len(config.PrivatePaths) > 0 { + cloneManager := gitclone.GetShared() + if cloneManager == nil { + return nil, errors.New("private-paths configured but git strategy not initialized - git strategy with mirror-root is required for private module support") + } + + s.cloneManager = cloneManager + privateFetcher := newPrivateFetcher(s.logger, cloneManager) + fetcher = NewCompositeFetcher(publicFetcher, privateFetcher, config.PrivatePaths) + + s.logger.InfoContext(ctx, "Configured private module support", + slog.Any("private-paths", config.PrivatePaths)) + } + + s.goproxy = &goproxy.Goproxy{ + Logger: s.logger, + Fetcher: fetcher, Cacher: &goproxyCacher{ cache: cache, }, diff --git a/internal/strategy/gomod/private_fetcher.go b/internal/strategy/gomod/private_fetcher.go new file mode 100644 index 0000000..c89f867 --- /dev/null +++ b/internal/strategy/gomod/private_fetcher.go @@ -0,0 +1,330 @@ +package gomod + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/fs" + "log/slog" + "os/exec" + "sort" + "strings" + "time" + + "github.com/alecthomas/errors" + "golang.org/x/mod/semver" + + "github.com/block/cachew/internal/gitclone" +) + +type privateFetcher struct { + logger *slog.Logger + cloneManager *gitclone.Manager +} + +type moduleInfo struct { + Version string `json:"Version"` + Time string `json:"Time"` +} + +func newPrivateFetcher(logger *slog.Logger, cloneManager *gitclone.Manager) *privateFetcher { + return &privateFetcher{ + logger: logger, + cloneManager: cloneManager, + } +} + +func (p *privateFetcher) Query(ctx context.Context, path, query string) (version string, t time.Time, err error) { + logger := p.logger.With(slog.String("module", path), slog.String("query", query)) + logger.DebugContext(ctx, "Private fetcher: Query") + + gitURL := p.modulePathToGitURL(path) + + repo, err := p.cloneManager.GetOrCreate(ctx, gitURL) + if err != nil { + return "", time.Time{}, errors.Wrapf(err, "get or create clone for %s", path) + } + + if err := p.ensureReady(ctx, repo); err != nil { + return "", time.Time{}, errors.Wrapf(err, "ensure repository ready for %s", gitURL) + } + + resolvedVersion, commitTime, err := p.resolveVersionQuery(ctx, repo, query) + if err != nil { + return "", time.Time{}, errors.Wrapf(err, "resolve version query %s", query) + } + + return resolvedVersion, commitTime, nil +} + +func (p *privateFetcher) List(ctx context.Context, path string) (versions []string, err error) { + logger := p.logger.With(slog.String("module", path)) + logger.DebugContext(ctx, "Private fetcher: List") + + gitURL := p.modulePathToGitURL(path) + repo, err := p.cloneManager.GetOrCreate(ctx, gitURL) + if err != nil { + return nil, errors.Wrapf(err, "get or create clone for %s", path) + } + + if err := p.ensureReady(ctx, repo); err != nil { + return nil, errors.Wrapf(err, "ensure repository ready for %s", gitURL) + } + + versions, err = p.listVersions(ctx, repo) + if err != nil { + return nil, errors.Wrap(err, "list versions") + } + + return versions, nil +} + +func (p *privateFetcher) Download(ctx context.Context, path, version string) (info, mod, zip io.ReadSeekCloser, err error) { + logger := p.logger.With(slog.String("module", path), slog.String("version", version)) + logger.DebugContext(ctx, "Private fetcher: Download") + + gitURL := p.modulePathToGitURL(path) + repo, err := p.cloneManager.GetOrCreate(ctx, gitURL) + if err != nil { + return nil, nil, nil, errors.Wrapf(err, "get or create clone for %s", path) + } + + if err := p.ensureReady(ctx, repo); err != nil { + return nil, nil, nil, errors.Wrapf(err, "ensure repository ready for %s", gitURL) + } + + if err := p.verifyCommitExists(ctx, repo, version); err != nil { + return nil, nil, nil, err + } + + infoReader, err := p.generateInfo(ctx, repo, version) + if err != nil { + return nil, nil, nil, errors.Wrap(err, "generate info") + } + + modReader := p.generateMod(ctx, repo, path, version) + + zipReader, err := p.generateZip(ctx, repo, path, version) + if err != nil { + _ = infoReader.Close() + _ = modReader.Close() + return nil, nil, nil, errors.Wrap(err, "generate zip") + } + + return infoReader, modReader, zipReader, nil +} + +func (p *privateFetcher) ensureReady(ctx context.Context, repo *gitclone.Repository) error { + if repo.State() == gitclone.StateReady { + return nil + } + + config := p.cloneManager.Config() + if err := repo.Clone(ctx, config); err != nil { + return errors.Wrap(err, "clone repository") + } + + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + timeout := time.After(30 * time.Minute) // reasonable timeout for cloning + + for { + if repo.State() == gitclone.StateReady { + return nil + } + + select { + case <-ticker.C: + // Continue polling + case <-timeout: + return errors.Errorf("timeout waiting for repository %s to be ready", repo.UpstreamURL()) + case <-ctx.Done(): + return errors.Wrap(ctx.Err(), "context cancelled while waiting for clone") + } + } +} + +func (p *privateFetcher) modulePathToGitURL(modulePath string) string { + return "https://" + modulePath +} + +func (p *privateFetcher) verifyCommitExists(ctx context.Context, repo *gitclone.Repository, ref string) error { + if !repo.HasCommit(ctx, ref) { + return errors.Errorf("commit %s not found in repository %s", ref, repo.UpstreamURL()) + } + return nil +} + +func (p *privateFetcher) resolveVersionQuery(ctx context.Context, repo *gitclone.Repository, query string) (string, time.Time, error) { + if query == "latest" { + versions, err := p.listVersions(ctx, repo) + if err != nil || len(versions) == 0 { + return p.getDefaultBranchVersion(ctx, repo) + } + + latestVersion := versions[len(versions)-1] + commitTime, err := p.getCommitTime(ctx, repo, latestVersion) + if err != nil { + return "", time.Time{}, err + } + return latestVersion, commitTime, nil + } + + if semver.IsValid(query) { + commitTime, err := p.getCommitTime(ctx, repo, query) + if err != nil { + return "", time.Time{}, fs.ErrNotExist + } + return query, commitTime, nil + } + + return p.getDefaultBranchVersion(ctx, repo) +} + +func (p *privateFetcher) listVersions(ctx context.Context, repo *gitclone.Repository) ([]string, error) { + var output []byte + var err error + + repo.WithReadLock(func() { + // #nosec G204 - repo.Path() is controlled by us + cmd := exec.CommandContext(ctx, "git", "-C", repo.Path(), "tag", "-l", "v*") + output, err = cmd.CombinedOutput() + }) + + if err != nil { + return nil, errors.Wrapf(err, "git tag failed: %s", string(output)) + } + + var versions []string + for line := range strings.Lines(string(output)) { + line = strings.TrimSpace(line) + if line != "" && semver.IsValid(line) { + versions = append(versions, line) + } + } + + sort.Slice(versions, func(i, j int) bool { + return semver.Compare(versions[i], versions[j]) < 0 + }) + + return versions, nil +} + +func (p *privateFetcher) getCommitTime(ctx context.Context, repo *gitclone.Repository, ref string) (time.Time, error) { + var output []byte + var err error + + repo.WithReadLock(func() { + // #nosec G204 - repo.Path() and ref are controlled by us + cmd := exec.CommandContext(ctx, "git", "-C", repo.Path(), "log", "-1", "--format=%cI", ref) + output, err = cmd.CombinedOutput() + }) + + if err != nil { + return time.Time{}, errors.Wrapf(err, "git log failed: %s", string(output)) + } + + timeStr := strings.TrimSpace(string(output)) + t, err := time.Parse(time.RFC3339, timeStr) + return t, errors.Wrap(err, "parse commit time") +} + +func (p *privateFetcher) getDefaultBranchVersion(ctx context.Context, repo *gitclone.Repository) (string, time.Time, error) { + var output []byte + var err error + + repo.WithReadLock(func() { + // #nosec G204 - repo.Path() is controlled by us + cmd := exec.CommandContext(ctx, "git", "-C", repo.Path(), "rev-parse", "HEAD") + output, err = cmd.CombinedOutput() + }) + + if err != nil { + return "", time.Time{}, errors.Wrapf(err, "git rev-parse failed: %s", string(output)) + } + + commitHash := strings.TrimSpace(string(output)) + commitTime, err := p.getCommitTime(ctx, repo, "HEAD") + if err != nil { + return "", time.Time{}, err + } + + pseudoVersion := fmt.Sprintf("v0.0.0-%s-%s", + commitTime.UTC().Format("20060102150405"), + commitHash[:12]) + + return pseudoVersion, commitTime, nil +} + +func (p *privateFetcher) generateInfo(ctx context.Context, repo *gitclone.Repository, version string) (io.ReadSeekCloser, error) { + commitTime, err := p.getCommitTime(ctx, repo, version) + if err != nil { + return nil, err + } + + info := moduleInfo{ + Version: version, + Time: commitTime.Format(time.RFC3339), + } + + data, err := json.Marshal(info) + if err != nil { + return nil, errors.Wrap(err, "marshal module info") + } + + return newReadSeekCloser(bytes.NewReader(data)), nil +} + +func (p *privateFetcher) generateMod(ctx context.Context, repo *gitclone.Repository, modulePath, version string) io.ReadSeekCloser { + var output []byte + var err error + + repo.WithReadLock(func() { + // #nosec G204 - version and repo.Path() are controlled by this package, not user input + cmd := exec.CommandContext(ctx, "git", "-C", repo.Path(), "show", fmt.Sprintf("%s:go.mod", version)) + output, err = cmd.CombinedOutput() + }) + + if err != nil { + minimal := fmt.Sprintf("module %s\n\ngo 1.21\n", modulePath) + return newReadSeekCloser(bytes.NewReader([]byte(minimal))) + } + + return newReadSeekCloser(bytes.NewReader(output)) +} + +func (p *privateFetcher) generateZip(ctx context.Context, repo *gitclone.Repository, modulePath, version string) (io.ReadSeekCloser, error) { + prefix := fmt.Sprintf("%s@%s/", modulePath, version) + var output []byte + var err error + + repo.WithReadLock(func() { + // #nosec G204 - version and repo.Path() are controlled by this package, not user input + cmd := exec.CommandContext(ctx, "git", "-C", repo.Path(), "archive", + "--format=zip", + fmt.Sprintf("--prefix=%s", prefix), + version) + output, err = cmd.CombinedOutput() + }) + + if err != nil { + return nil, errors.Wrapf(err, "git archive failed: %s", string(output)) + } + + return newReadSeekCloser(bytes.NewReader(output)), nil +} + +type readSeekCloser struct { + *bytes.Reader +} + +func newReadSeekCloser(r *bytes.Reader) io.ReadSeekCloser { + return &readSeekCloser{Reader: r} +} + +func (r *readSeekCloser) Close() error { + return nil +}