diff --git a/README.md b/README.md index f48fbfb..399b199 100644 --- a/README.md +++ b/README.md @@ -426,6 +426,24 @@ echo "gruntwork-io/terragrunt gruntwork-io/terratest" | git-xargs \ "$(pwd)/scripts/update-copyright-year.sh" ``` +### Option #5: Select repos via the GitHub Search API + +The repository scope can be narrowed down by using the GitHub Search API. This allows you to select repositories based on various criteria, such as language, topics, or other metadata or file content. + +- `--github-repository-search` to select repositories based on the [GitHub Repository Search API](https://docs.github.com/en/search-github/searching-on-github/searching-for-repositories) +- `--github-code-search` to select repositories based on the [GitHub Code Search API](https://docs.github.com/en/search-github/github-code-search/understanding-github-code-search-syntax) +- `--github-org` still can be used, it will add `org:` to the search query + +If both, `--github-repository-search` and `--github-code-search` are provided, the repositories will be filtered by both criteria. + +``` +git-xargs \ + --github-org \ + --github-repository-search "is:private language:go" \ + --github-code-search "filename:Dockerfile ubuntu" \ + "$(pwd)/scripts/update-copyright-year.sh" +``` + ## Notable flags `git-xargs` exposes several flags that allow you to customize its behavior to better suit your needs. For the latest info on flags, you should run `git-xargs --help`. However, a couple of the flags are worth explaining more in depth here: diff --git a/auth/auth.go b/auth/auth.go index 83b1be4..b8a10dd 100644 --- a/auth/auth.go +++ b/auth/auth.go @@ -24,6 +24,12 @@ type githubRepositoriesService interface { ListByOrg(ctx context.Context, org string, opts *github.RepositoryListByOrgOptions) ([]*github.Repository, *github.Response, error) } +// The go-github package satisfies this Search service's interface in production +type githubSearchService interface { + Repositories(ctx context.Context, query string, opts *github.SearchOptions) (*github.RepositoriesSearchResult, *github.Response, error) + Code(ctx context.Context, query string, opts *github.SearchOptions) (*github.CodeSearchResult, *github.Response, error) +} + // GithubClient is the data structure that is common between production code and test code. In production code, // go-github satisfies the PullRequests and Repositories service interfaces, whereas in test the concrete // implementations for these same services are mocks that return a static slice of pointers to GitHub repositories, @@ -32,12 +38,14 @@ type githubRepositoriesService interface { type GithubClient struct { PullRequests githubPullRequestService Repositories githubRepositoriesService + Search githubSearchService } func NewClient(client *github.Client) GithubClient { return GithubClient{ PullRequests: client.PullRequests, Repositories: client.Repositories, + Search: client.Search, } } diff --git a/cmd/git-xargs.go b/cmd/git-xargs.go index 2286ecc..369e18b 100644 --- a/cmd/git-xargs.go +++ b/cmd/git-xargs.go @@ -36,6 +36,8 @@ func parseGitXargsConfig(c *cli.Context) (*config.GitXargsConfig, error) { config.TeamReviewers = c.StringSlice("team-reviewers") config.ReposFile = c.String("repos") config.GithubOrg = c.String("github-org") + config.GithubRepositorySearch = c.String("github-repository-search") + config.GithubCodeSearch = c.String("github-code-search") config.RepoSlice = c.StringSlice("repo") config.MaxConcurrentRepos = c.Int("max-concurrent-repos") config.SecondsToSleepBetweenPRs = c.Int("seconds-between-prs") diff --git a/common/common.go b/common/common.go index e731735..5feb45d 100644 --- a/common/common.go +++ b/common/common.go @@ -30,6 +30,8 @@ const ( DefaultSecondsBetweenPRs = 1 DefaultMaxPullRequestRetries = 3 DefaultSecondsToWaitWhenRateLimited = 60 + GithubRepositorySearchFlagName = "github-repository-search" + GithubCodeSearchFlagName = "github-code-search" ) var ( @@ -120,4 +122,12 @@ var ( Name: KeepClonedRepositoriesFlagName, Usage: "By default, git-xargs deletes the cloned repositories from the temp directory after the command has finished running, to save space on your machine. Pass this flag to prevent git-xargs from deleting the cloned repositories.", } + GenericGithubRepositorySearchFlag = cli.StringFlag{ + Name: GithubRepositorySearchFlagName, + Usage: "GitHub repository search query to find repositories (e.g., 'language:go', 'is:private', 'topic:docker'). See GitHub repository search syntax for more options.", + } + GenericGithubCodeSearchFlag = cli.StringFlag{ + Name: GithubCodeSearchFlagName, + Usage: "GitHub code search query to find repositories containing matching code (e.g., 'path:Dockerfile', 'filename:package.json', 'extension:py print'). Repositories will be extracted from code search results. See GitHub code search syntax for more options.", + } ) diff --git a/config/config.go b/config/config.go index 34b7858..beccf8b 100644 --- a/config/config.go +++ b/config/config.go @@ -28,6 +28,8 @@ type GitXargsConfig struct { TeamReviewers []string ReposFile string GithubOrg string + GithubRepositorySearch string + GithubCodeSearch string RepoSlice []string RepoFromStdIn []string Args []string @@ -61,6 +63,8 @@ func NewGitXargsConfig() *GitXargsConfig { TeamReviewers: []string{}, ReposFile: "", GithubOrg: "", + GithubRepositorySearch: "", + GithubCodeSearch: "", RepoSlice: []string{}, RepoFromStdIn: []string{}, Args: []string{}, diff --git a/io/validate-input.go b/io/validate-input.go index 6c722d6..694d748 100644 --- a/io/validate-input.go +++ b/io/validate-input.go @@ -8,7 +8,8 @@ import ( // EnsureValidOptionsPassed checks that user has provided one valid method for selecting repos to operate on func EnsureValidOptionsPassed(config *config.GitXargsConfig) error { - if len(config.RepoSlice) < 1 && config.ReposFile == "" && config.GithubOrg == "" && len(config.RepoFromStdIn) == 0 { + if len(config.RepoSlice) < 1 && config.ReposFile == "" && config.GithubOrg == "" && + config.GithubRepositorySearch == "" && config.GithubCodeSearch == "" && len(config.RepoFromStdIn) == 0 { return errors.WithStackTrace(types.NoRepoSelectionsMadeErr{}) } if config.BranchName == "" { diff --git a/main.go b/main.go index 7f4f656..5fbcbaa 100644 --- a/main.go +++ b/main.go @@ -79,6 +79,8 @@ func setupApp() *cli.App { common.GenericMaxConcurrentClonesFlag, common.GenericNoSkipCIFlag, common.GenericKeepClonedRepositoriesFlag, + common.GenericGithubRepositorySearchFlag, + common.GenericGithubCodeSearchFlag, } app.Action = cmd.RunGitXargs diff --git a/repository/fetch-repos.go b/repository/fetch-repos.go index bcf7596..8b2459e 100644 --- a/repository/fetch-repos.go +++ b/repository/fetch-repos.go @@ -3,6 +3,7 @@ package repository import ( "context" "fmt" + "strings" "github.com/gruntwork-io/git-xargs/auth" "github.com/gruntwork-io/git-xargs/config" @@ -131,3 +132,310 @@ func getReposByOrg(config *config.GitXargsConfig) ([]*github.Repository, error) return allRepos, nil } + +// getReposBySearch uses GitHub's search API to find repositories matching the given query +func getReposBySearch(config *config.GitXargsConfig) ([]*github.Repository, error) { + logger := logging.GetLogger("git-xargs") + + // Handle different search scenarios + if config.GithubRepositorySearch != "" && config.GithubCodeSearch != "" { + // Both searches provided - return intersection + logger.Debug("Both repository and code search queries provided, finding intersection") + return getReposByIntersection(config) + } else if config.GithubRepositorySearch != "" { + // Only repository search + return getReposByRepositorySearch(config) + } else if config.GithubCodeSearch != "" { + // Only code search + return getReposByCodeSearch(config) + } + + return nil, errors.WithStackTrace(types.NoGithubSearchQuerySuppliedErr{}) +} + +// getReposByIntersection finds repositories that match both repository and code search queries +func getReposByIntersection(config *config.GitXargsConfig) ([]*github.Repository, error) { + logger := logging.GetLogger("git-xargs") + + // Get repositories from repository search + repoSearchRepos, err := getReposByRepositorySearch(config) + if err != nil { + return nil, err + } + + // Get repositories from code search + codeSearchRepos, err := getReposByCodeSearch(config) + if err != nil { + return nil, err + } + + // Find intersection + repoMap := make(map[string]*github.Repository) + for _, repo := range repoSearchRepos { + repoMap[repo.GetFullName()] = repo + } + + var intersectionRepos []*github.Repository + for _, repo := range codeSearchRepos { + if _, found := repoMap[repo.GetFullName()]; found { + intersectionRepos = append(intersectionRepos, repo) + } + } + + repoCount := len(intersectionRepos) + if repoCount == 0 { + return nil, errors.WithStackTrace(types.NoReposFoundFromSearchErr{ + Query: fmt.Sprintf("intersection of repository search '%s' and code search '%s'", + config.GithubRepositorySearch, config.GithubCodeSearch), + }) + } + + logger.WithFields(logrus.Fields{ + "Repo count": repoCount, + "Repository Query": config.GithubRepositorySearch, + "Code Query": config.GithubCodeSearch, + }).Debug("Found intersection of repository and code search results") + + config.Stats.TrackMultiple(stats.FetchedViaGithubAPI, intersectionRepos) + + return intersectionRepos, nil +} + +// getReposByRepositorySearch uses GitHub's repository search API to find repositories matching the given query +func getReposByRepositorySearch(config *config.GitXargsConfig) ([]*github.Repository, error) { + logger := logging.GetLogger("git-xargs") + + var allRepos []*github.Repository + + if config.GithubRepositorySearch == "" { + return nil, errors.WithStackTrace(types.NoGithubRepositorySearchQuerySuppliedErr{}) + } + + // Build the search query + searchQuery := config.GithubRepositorySearch + + // If a specific organization is provided, add it to the query + if config.GithubOrg != "" { + searchQuery = fmt.Sprintf("%s org:%s", searchQuery, config.GithubOrg) + } + + logger.WithFields(logrus.Fields{ + "Query": searchQuery, + }).Debug("Searching for repositories using GitHub Repository Search API") + + opt := &github.SearchOptions{ + ListOptions: github.ListOptions{ + PerPage: 100, + }, + } + + for { + var reposToAdd []*github.Repository + result, resp, err := config.GithubClient.Search.Repositories(context.Background(), searchQuery, opt) + if err != nil { + return allRepos, errors.WithStackTrace(err) + } + + repos := result.Repositories + + // Filter out archived repos if --skip-archived-repos is passed + if config.SkipArchivedRepos { + for _, repo := range repos { + if repo.GetArchived() { + logger.WithFields(logrus.Fields{ + "Name": repo.GetFullName(), + }).Debug("Skipping archived repository from search results") + + // Track repos to skip because of archived status for our final run report + config.Stats.TrackSingle(stats.ReposArchivedSkipped, repo) + } else { + // Ensure complete repository data before adding + completeRepo, err := ensureCompleteRepositoryData(config, repo) + if err != nil { + logger.WithFields(logrus.Fields{ + "Repo": repo.GetFullName(), + "Error": err, + }).Debug("Error fetching complete repository data") + // Continue with original repo data + completeRepo = repo + } + reposToAdd = append(reposToAdd, completeRepo) + } + } + } else { + // Ensure complete repository data for all repos + for _, repo := range repos { + completeRepo, err := ensureCompleteRepositoryData(config, repo) + if err != nil { + logger.WithFields(logrus.Fields{ + "Repo": repo.GetFullName(), + "Error": err, + }).Debug("Error fetching complete repository data") + // Continue with original repo data + completeRepo = repo + } + reposToAdd = append(reposToAdd, completeRepo) + } + } + + allRepos = append(allRepos, reposToAdd...) + + if resp.NextPage == 0 { + break + } + opt.Page = resp.NextPage + } + + repoCount := len(allRepos) + + if repoCount == 0 { + return nil, errors.WithStackTrace(types.NoReposFoundFromSearchErr{Query: searchQuery}) + } + + logger.WithFields(logrus.Fields{ + "Repo count": repoCount, + "Query": searchQuery, + }).Debug("Fetched repos from GitHub Repository Search API") + + config.Stats.TrackMultiple(stats.FetchedViaGithubAPI, allRepos) + + return allRepos, nil +} + +// getReposByCodeSearch uses GitHub's code search API to find repositories containing matching code +func getReposByCodeSearch(config *config.GitXargsConfig) ([]*github.Repository, error) { + logger := logging.GetLogger("git-xargs") + + var allRepos []*github.Repository + repoMap := make(map[string]*github.Repository) // To avoid duplicates + + if config.GithubCodeSearch == "" { + return allRepos, errors.WithStackTrace(types.NoGithubCodeSearchQuerySuppliedErr{}) + } + + // Build the search query + searchQuery := config.GithubCodeSearch + + // If a specific organization is provided, add it to the query + if config.GithubOrg != "" { + searchQuery = fmt.Sprintf("%s org:%s", searchQuery, config.GithubOrg) + } + + logger.WithFields(logrus.Fields{ + "Query": searchQuery, + }).Debug("Searching for code using GitHub Code Search API") + + opt := &github.SearchOptions{ + ListOptions: github.ListOptions{ + PerPage: 100, + }, + } + + for { + result, resp, err := config.GithubClient.Search.Code(context.Background(), searchQuery, opt) + if err != nil { + return allRepos, errors.WithStackTrace(err) + } + + // Extract unique repositories from code search results + for _, codeResult := range result.CodeResults { + repo := codeResult.Repository + if repo != nil { + repoKey := repo.GetFullName() + + // Skip archived repos if --skip-archived-repos is passed + if config.SkipArchivedRepos && repo.GetArchived() { + logger.WithFields(logrus.Fields{ + "Name": repo.GetFullName(), + }).Debug("Skipping archived repository from code search results") + + // Track repos to skip because of archived status for our final run report + config.Stats.TrackSingle(stats.ReposArchivedSkipped, repo) + continue + } + + // Add to map to avoid duplicates + repoMap[repoKey] = repo + } + } + + if resp.NextPage == 0 { + break + } + opt.Page = resp.NextPage + } + + // Convert map to slice and ensure complete repository data + for _, repo := range repoMap { + completeRepo, err := ensureCompleteRepositoryData(config, repo) + if err != nil { + logger.WithFields(logrus.Fields{ + "Repo": repo.GetFullName(), + "Error": err, + }).Debug("Error fetching complete repository data") + // Continue with original repo data + completeRepo = repo + } + allRepos = append(allRepos, completeRepo) + } + + repoCount := len(allRepos) + + if repoCount == 0 { + return nil, errors.WithStackTrace(types.NoReposFoundFromSearchErr{Query: searchQuery}) + } + + logger.WithFields(logrus.Fields{ + "Repo count": repoCount, + "Query": searchQuery, + }).Debug("Fetched repos from GitHub Code Search API") + + config.Stats.TrackMultiple(stats.FetchedViaGithubAPI, allRepos) + + return allRepos, nil +} + +// ensureCompleteRepositoryData fetches complete repository information using GitHub's Get Repository API +// This is needed because search results may not include all fields (like CloneURL) that are required +func ensureCompleteRepositoryData(config *config.GitXargsConfig, repo *github.Repository) (*github.Repository, error) { + if repo == nil || repo.FullName == nil { + return repo, nil + } + + // Check if we already have the essential fields - if so, no need to make additional API call + if repo.CloneURL != nil && repo.HTMLURL != nil && repo.URL != nil { + return repo, nil + } + + logger := logging.GetLogger("git-xargs") + + // Parse owner and repo name from FullName + parts := strings.Split(*repo.FullName, "/") + if len(parts) != 2 { + logger.WithFields(logrus.Fields{ + "FullName": *repo.FullName, + }).Debug("Invalid repository FullName format, skipping complete data fetch") + return repo, nil + } + + owner := parts[0] + repoName := parts[1] + + // Fetch complete repository data + completeRepo, _, err := config.GithubClient.Repositories.Get(context.Background(), owner, repoName) + if err != nil { + logger.WithFields(logrus.Fields{ + "Owner": owner, + "Repo": repoName, + "Error": err, + }).Debug("Failed to fetch complete repository data, using search result as-is") + return repo, nil // Return original repo if we can't fetch complete data + } + + logger.WithFields(logrus.Fields{ + "Owner": owner, + "Repo": repoName, + }).Debug("Fetched complete repository data via Get Repository API") + + return completeRepo, nil +} diff --git a/repository/select-repos.go b/repository/select-repos.go index e24513c..72a544e 100644 --- a/repository/select-repos.go +++ b/repository/select-repos.go @@ -21,15 +21,30 @@ const ( ExplicitReposOnCommandLine RepoSelectionCriteria = "repo-flag" ReposFilePath RepoSelectionCriteria = "repos-file" GithubOrganization RepoSelectionCriteria = "github-org" + GithubRepositorySearch RepoSelectionCriteria = "github-repository-search" + GithubCodeSearch RepoSelectionCriteria = "github-code-search" + GithubCombinedSearch RepoSelectionCriteria = "github-combined-search" ) // getPreferredOrderOfRepoSelections codifies the order in which flags will be preferred when the user supplied more // than one: -// 1. --github-org is a string representing the GitHub org to page through via API for all repos. -// 2. --repos is a string representing a filepath to a repos file -// 3. --repo is a string slice flag that can be called multiple times -// 4. stdin allows you to pipe repos in from other CLI tools +// 1. Both --github-repository-search and --github-code-search (intersection of both) +// 2. --github-repository-search for repository-based searches +// 3. --github-code-search for code-based searches +// 4. --github-org is a string representing the GitHub org to page through via API for all repos. +// 5. --repos is a string representing a filepath to a repos file +// 6. --repo is a string slice flag that can be called multiple times +// 7. stdin allows you to pipe repos in from other CLI tools func getPreferredOrderOfRepoSelections(config *config.GitXargsConfig) RepoSelectionCriteria { + if config.GithubRepositorySearch != "" && config.GithubCodeSearch != "" { + return GithubCombinedSearch + } + if config.GithubRepositorySearch != "" { + return GithubRepositorySearch + } + if config.GithubCodeSearch != "" { + return GithubCodeSearch + } if config.GithubOrg != "" { return GithubOrganization } @@ -72,6 +87,33 @@ func selectReposViaInput(config *config.GitXargsConfig) (*RepoSelection, error) GithubOrganizationName: config.GithubOrg, } switch getPreferredOrderOfRepoSelections(config) { + case GithubRepositorySearch: + config.Stats.SetSelectionMode(string(GithubRepositorySearch)) + + return &RepoSelection{ + SelectionType: GithubRepositorySearch, + AllowedRepos: []*types.AllowedRepo{}, + GithubOrganizationName: "", + }, nil + + case GithubCodeSearch: + config.Stats.SetSelectionMode(string(GithubCodeSearch)) + + return &RepoSelection{ + SelectionType: GithubCodeSearch, + AllowedRepos: []*types.AllowedRepo{}, + GithubOrganizationName: "", + }, nil + + case GithubCombinedSearch: + config.Stats.SetSelectionMode(string(GithubCombinedSearch)) + + return &RepoSelection{ + SelectionType: GithubCombinedSearch, + AllowedRepos: []*types.AllowedRepo{}, + GithubOrganizationName: "", + }, nil + case ExplicitReposOnCommandLine: config.Stats.SetSelectionMode(string(ExplicitReposOnCommandLine)) @@ -200,6 +242,51 @@ func OperateOnRepos(config *config.GitXargsConfig) error { switch repoSelection.GetCriteria() { + case GithubRepositorySearch: + // If githubRepositorySearch is set, use the GitHub Search API to find matching repositories + reposFetchedFromSearch, err := getReposBySearch(config) + if err != nil { + logger.WithFields(logrus.Fields{ + "Error": err, + "Query": config.GithubRepositorySearch, + }).Debug("Failure searching for repos using GitHub Search API") + return err + } + // We gather all the repos by searching them from the GitHub API + reposToIterate = reposFetchedFromSearch + + logger.Debugf("Using GitHub repository search query: %s as source of repositories. Searching through GitHub API for repos.", config.GithubRepositorySearch) + + case GithubCodeSearch: + // If githubCodeSearch is set, use the GitHub Search API to find matching repositories + reposFetchedFromSearch, err := getReposBySearch(config) + if err != nil { + logger.WithFields(logrus.Fields{ + "Error": err, + "Query": config.GithubCodeSearch, + }).Debug("Failure searching for repos using GitHub Search API") + return err + } + // We gather all the repos by searching them from the GitHub API + reposToIterate = reposFetchedFromSearch + + logger.Debugf("Using GitHub code search query: %s as source of repositories. Searching through GitHub API for repos.", config.GithubCodeSearch) + + case GithubCombinedSearch: + // If githubCombinedSearch is set, use the GitHub Search API to find matching repositories + reposFetchedFromSearch, err := getReposBySearch(config) + if err != nil { + logger.WithFields(logrus.Fields{ + "Error": err, + "Query": config.GithubRepositorySearch + " " + config.GithubCodeSearch, + }).Debug("Failure searching for repos using GitHub Search API") + return err + } + // We gather all the repos by searching them from the GitHub API + reposToIterate = reposFetchedFromSearch + + logger.Debugf("Using GitHub combined search query: %s as source of repositories. Searching through GitHub API for repos.", config.GithubRepositorySearch+" "+config.GithubCodeSearch) + case GithubOrganization: // If githubOrganization is set, the user did not provide a flat file or explicit repos via the -repo(s) flags, so we're just looking up all the GitHub // repos via their Organization name via the GitHub API diff --git a/types/types.go b/types/types.go index 88d61a3..dee2de9 100644 --- a/types/types.go +++ b/types/types.go @@ -67,7 +67,7 @@ func (NoGithubOrgSuppliedErr) Error() string { type NoRepoSelectionsMadeErr struct{} func (NoRepoSelectionsMadeErr) Error() string { - return fmt.Sprint("You must target some repos for processing either via stdin or by providing one of the --github-org, --repos, or --repo flags") + return fmt.Sprint("You must target some repos for processing either via stdin or by providing one of the --github-repository-search, --github-code-search, --github-org, --repos, or --repo flags") } type NoRepoFlagTargetsValid struct{} @@ -107,3 +107,29 @@ type NoGithubOauthTokenProvidedErr struct{} func (NoGithubOauthTokenProvidedErr) Error() string { return fmt.Sprintf("You must export a valid Github personal access token as GITHUB_OAUTH_TOKEN") } + +type NoGithubSearchQuerySuppliedErr struct{} + +func (NoGithubSearchQuerySuppliedErr) Error() string { + return fmt.Sprint("You must pass a valid GitHub search query via the --github-search flag") +} + +type NoReposFoundFromSearchErr struct { + Query string +} + +func (err NoReposFoundFromSearchErr) Error() string { + return fmt.Sprintf("No repos found for the search query: %s", err.Query) +} + +type NoGithubRepositorySearchQuerySuppliedErr struct{} + +func (NoGithubRepositorySearchQuerySuppliedErr) Error() string { + return fmt.Sprint("You must pass a valid GitHub repository search query via the --github-repository-search flag") +} + +type NoGithubCodeSearchQuerySuppliedErr struct{} + +func (NoGithubCodeSearchQuerySuppliedErr) Error() string { + return fmt.Sprint("You must pass a valid GitHub code search query via the --github-code-search flag") +}