diff --git a/.github/workflows/check-untracked-repos.yml b/.github/workflows/check-untracked-repos.yml new file mode 100644 index 000000000..85c37d027 --- /dev/null +++ b/.github/workflows/check-untracked-repos.yml @@ -0,0 +1,23 @@ +name: Check Untracked Repositories + +on: + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +jobs: + check-untracked-repos: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + persist-credentials: false + + - name: Install Rust stable + uses: ./.github/actions/setup-rust + + - name: Check for untracked repositories + shell: bash + run: | + cargo build --release + ./target/release/rust-team ci check-untracked-repos diff --git a/.github/workflows/dry-run.yml b/.github/workflows/dry-run.yml index 965d983d2..9b148aed8 100644 --- a/.github/workflows/dry-run.yml +++ b/.github/workflows/dry-run.yml @@ -111,4 +111,4 @@ jobs: echo "Pull request ${PR}" gh pr comment ${PR} --repo rust-lang/team --body-file comment.txt \ --edit-last \ - --create-if-none + --create-if-none \ No newline at end of file diff --git a/src/api/github.rs b/src/api/github.rs index 8945ebb85..859f39fad 100644 --- a/src/api/github.rs +++ b/src/api/github.rs @@ -116,6 +116,17 @@ impl GitHubApi { .json()?) } + pub(crate) fn get(&self, url: &str) -> Result + where + T: serde::de::DeserializeOwned, + { + Ok(self + .prepare(false, Method::GET, url)? + .send()? + .error_for_status()? + .json()?) + } + pub(crate) fn usernames(&self, ids: &[u64]) -> Result, Error> { #[derive(serde::Deserialize)] #[serde(rename_all = "camelCase")] diff --git a/src/ci.rs b/src/ci.rs index 4ae611b64..c27f2c53f 100644 --- a/src/ci.rs +++ b/src/ci.rs @@ -1,7 +1,8 @@ use crate::data::Data; use crate::schema::RepoPermission; -use anyhow::Context; -use std::collections::BTreeSet; +use anyhow::{bail, Context}; +use log::{debug, info, warn}; +use std::collections::{BTreeSet, HashSet}; use std::path::{Path, PathBuf}; /// Generates the contents of `.github/CODEOWNERS`, based on @@ -170,3 +171,134 @@ fn codeowners_path() -> PathBuf { .join(".github") .join("CODEOWNERS") } + +#[derive(Debug, serde::Deserialize)] +struct GitHubRepo { + name: String, + archived: bool, + fork: bool, +} + +#[derive(Debug)] +struct UntrackedRepo { + org: String, + name: String, +} + +/// Check for untracked repositories and fail if any are found +pub fn check_untracked_repos(data: &Data) -> anyhow::Result<()> { + let github = crate::api::github::GitHubApi::new(); + + // Get allowed GitHub organizations from config instead of hardcoding + let orgs_to_monitor: Vec<&str> = data + .config() + .allowed_github_orgs() + .iter() + .filter(|org| { + // Exclude independent orgs that shouldn't be synchronized + !data + .config() + .independent_github_orgs() + .contains(org.as_str()) + }) + .map(|s| s.as_str()) + .collect(); + + info!( + "🔍 Checking for untracked repositories in organizations: {}", + orgs_to_monitor.join(", ") + ); + + info!("Fetching repositories from GitHub..."); + let github_repos = fetch_all_github_repos(&github, &orgs_to_monitor)?; + info!( + "Found {} total repositories in GitHub organizations", + github_repos.len() + ); + + info!("Parsing local TOML files..."); + let tracked_repos = parse_tracked_repos(data); + info!( + "Found {} tracked repositories in repos/ directory", + tracked_repos.len() + ); + + info!("Comparing GitHub repos with tracked repos..."); + let untracked = find_untracked_repos(&github_repos, &tracked_repos); + + if untracked.is_empty() { + info!("✅ All repositories are tracked!"); + return Ok(()); + } + + warn!("❌ Found {} untracked repositories:", untracked.len()); + for repo in &untracked { + warn!(" - {}/{}", repo.org, repo.name); + } + + bail!( + "Found {} untracked repositories. Please add them to the repos/ directory.", + untracked.len() + ); +} + +fn fetch_all_github_repos( + github: &crate::api::github::GitHubApi, + orgs_to_monitor: &[&str], +) -> anyhow::Result> { + let mut all_repos = Vec::new(); + + for org in orgs_to_monitor { + debug!("Fetching repos for org: {}", org); + let mut page = 1; + + loop { + let url = format!("orgs/{}/repos?per_page=100&page={}", org, page); + + let repos: Vec = github + .get(&url) + .with_context(|| format!("Failed to fetch repos for org: {}", org))?; + + if repos.is_empty() { + break; + } + + for repo in repos { + all_repos.push((org.to_string(), repo)); + } + + page += 1; + } + } + + Ok(all_repos) +} + +fn parse_tracked_repos(data: &Data) -> HashSet<(String, String)> { + data.all_repos() + .map(|repo| (repo.org.clone(), repo.name.clone())) + .collect() +} + +fn find_untracked_repos( + github_repos: &[(String, GitHubRepo)], + tracked_repos: &HashSet<(String, String)>, +) -> Vec { + github_repos + .iter() + .filter(|(org, repo)| { + // Skip forks + if repo.fork { + debug!("Skipping fork: {}/{}", org, repo.name); + return false; + } + + // Check if tracked + !tracked_repos.contains(&(org.clone(), repo.name.clone())) + }) + .map(|(org, repo)| UntrackedRepo { + org: org.clone(), + name: repo.name.clone(), + }) + .collect() +} diff --git a/src/main.rs b/src/main.rs index 3e09d8b69..b7bd2909f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -115,6 +115,8 @@ enum CiOpts { GenerateCodeowners, /// Check if the .github/CODEOWNERS file is up-to-date CheckCodeowners, + /// Check for untracked repositories in GitHub organizations + CheckUntrackedRepos, } #[derive(clap::Parser, Debug)] @@ -535,6 +537,7 @@ fn run() -> Result<(), Error> { Cli::Ci(opts) => match opts { CiOpts::GenerateCodeowners => generate_codeowners_file(data)?, CiOpts::CheckCodeowners => check_codeowners(data)?, + CiOpts::CheckUntrackedRepos => ci::check_untracked_repos(&data)?, }, Cli::Sync(opts) => { if let Err(err) = perform_sync(opts, data) {