diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index cc406b82ae..884ea6f6bc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -682,6 +682,9 @@ importers: cron-time-generator: specifier: ^1.3.0 version: 1.3.2 + js-yaml: + specifier: ^4.1.0 + version: 4.1.1 p-queue: specifier: ^8.1.0 version: 8.1.0 @@ -695,6 +698,9 @@ importers: specifier: ^5.6.3 version: 5.6.3 devDependencies: + '@types/js-yaml': + specifier: ^4.0.9 + version: 4.0.9 '@types/node': specifier: ^20.8.2 version: 20.12.7 @@ -4960,6 +4966,9 @@ packages: '@types/is-stream@1.1.0': resolution: {integrity: sha512-jkZatu4QVbR60mpIzjINmtS1ZF4a/FqdTUTBeQDVOQ2PYyidtwFKr0B5G6ERukKwliq+7mIXvxyppwzG5EgRYg==} + '@types/js-yaml@4.0.9': + resolution: {integrity: sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==} + '@types/json-schema@7.0.15': resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==} @@ -10428,8 +10437,8 @@ snapshots: dependencies: '@aws-crypto/sha256-browser': 3.0.0 '@aws-crypto/sha256-js': 3.0.0 - '@aws-sdk/client-sso-oidc': 3.572.0(@aws-sdk/client-sts@3.572.0) - '@aws-sdk/client-sts': 3.572.0 + '@aws-sdk/client-sso-oidc': 3.572.0 + '@aws-sdk/client-sts': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) '@aws-sdk/core': 3.572.0 '@aws-sdk/credential-provider-node': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0)(@aws-sdk/client-sts@3.572.0) '@aws-sdk/middleware-host-header': 3.567.0 @@ -10623,11 +10632,11 @@ snapshots: transitivePeerDependencies: - aws-crt - '@aws-sdk/client-sso-oidc@3.572.0(@aws-sdk/client-sts@3.572.0)': + '@aws-sdk/client-sso-oidc@3.572.0': dependencies: '@aws-crypto/sha256-browser': 3.0.0 '@aws-crypto/sha256-js': 3.0.0 - '@aws-sdk/client-sts': 3.572.0 + '@aws-sdk/client-sts': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) '@aws-sdk/core': 3.572.0 '@aws-sdk/credential-provider-node': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0)(@aws-sdk/client-sts@3.572.0) '@aws-sdk/middleware-host-header': 3.567.0 @@ -10666,7 +10675,6 @@ snapshots: '@smithy/util-utf8': 2.3.0 tslib: 2.6.2 transitivePeerDependencies: - - '@aws-sdk/client-sts' - aws-crt '@aws-sdk/client-sso@3.556.0': @@ -10842,11 +10850,11 @@ snapshots: transitivePeerDependencies: - aws-crt - '@aws-sdk/client-sts@3.572.0': + '@aws-sdk/client-sts@3.572.0(@aws-sdk/client-sso-oidc@3.572.0)': dependencies: '@aws-crypto/sha256-browser': 3.0.0 '@aws-crypto/sha256-js': 3.0.0 - '@aws-sdk/client-sso-oidc': 3.572.0(@aws-sdk/client-sts@3.572.0) + '@aws-sdk/client-sso-oidc': 3.572.0 '@aws-sdk/core': 3.572.0 '@aws-sdk/credential-provider-node': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0)(@aws-sdk/client-sts@3.572.0) '@aws-sdk/middleware-host-header': 3.567.0 @@ -10885,6 +10893,7 @@ snapshots: '@smithy/util-utf8': 2.3.0 tslib: 2.6.2 transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' - aws-crt '@aws-sdk/client-sts@3.985.0': @@ -11050,7 +11059,7 @@ snapshots: '@aws-sdk/credential-provider-ini@3.572.0(@aws-sdk/client-sso-oidc@3.572.0)(@aws-sdk/client-sts@3.572.0)': dependencies: - '@aws-sdk/client-sts': 3.572.0 + '@aws-sdk/client-sts': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) '@aws-sdk/credential-provider-env': 3.568.0 '@aws-sdk/credential-provider-process': 3.572.0 '@aws-sdk/credential-provider-sso': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) @@ -11227,7 +11236,7 @@ snapshots: '@aws-sdk/credential-provider-web-identity@3.568.0(@aws-sdk/client-sts@3.572.0)': dependencies: - '@aws-sdk/client-sts': 3.572.0 + '@aws-sdk/client-sts': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) '@aws-sdk/types': 3.567.0 '@smithy/property-provider': 2.2.0 '@smithy/types': 2.12.0 @@ -11539,7 +11548,7 @@ snapshots: '@aws-sdk/token-providers@3.572.0(@aws-sdk/client-sso-oidc@3.572.0)': dependencies: - '@aws-sdk/client-sso-oidc': 3.572.0(@aws-sdk/client-sts@3.572.0) + '@aws-sdk/client-sso-oidc': 3.572.0 '@aws-sdk/types': 3.567.0 '@smithy/property-provider': 2.2.0 '@smithy/shared-ini-file-loader': 2.4.0 @@ -14496,6 +14505,8 @@ snapshots: dependencies: '@types/node': 20.12.7 + '@types/js-yaml@4.0.9': {} + '@types/json-schema@7.0.15': {} '@types/json5@0.0.29': {} diff --git a/services/apps/cron_service/package.json b/services/apps/cron_service/package.json index 0e66d995ba..ff6738ce13 100644 --- a/services/apps/cron_service/package.json +++ b/services/apps/cron_service/package.json @@ -27,6 +27,7 @@ "@crowd/telemetry": "workspace:*", "@crowd/redis": "workspace:*", "@aws-sdk/client-s3": "^3.700.0", + "js-yaml": "^4.1.0", "@dsnp/parquetjs": "^1.7.0", "cron": "^2.1.0", "cron-time-generator": "^1.3.0", @@ -36,6 +37,7 @@ "typescript": "^5.6.3" }, "devDependencies": { + "@types/js-yaml": "^4.0.9", "@types/node": "^20.8.2", "nodemon": "^3.0.1" } diff --git a/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts new file mode 100644 index 0000000000..502a596333 --- /dev/null +++ b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts @@ -0,0 +1,215 @@ +import CronTime from 'cron-time-generator' +import yaml from 'js-yaml' + +import { WRITE_DB_CONFIG, getDbConnection } from '@crowd/data-access-layer/src/database' +import { pgpQx } from '@crowd/data-access-layer/src/queryExecutor' +import { getRepositoriesByUrl } from '@crowd/data-access-layer/src/repositories' +import { + createRepositoryGroup, + listRepositoryGroups, + updateRepositoryGroup, +} from '@crowd/data-access-layer/src/repositoryGroups' + +import { IJobDefinition } from '../types' + +// --------------------------------------------------------------------------- +// Config — one entry per governance YAML source we want to sync. +// Adding a second platform in the future is as simple as appending an entry. +// --------------------------------------------------------------------------- +const GOVERNANCE_SOURCES = [ + { + // OpenStack governance YAML published by the TC + yamlUrl: 'https://opendev.org/openstack/governance/raw/branch/master/reference/projects.yaml', + // Repos in the YAML are listed as "/". + // We convert them to full URLs using this prefix. + repoUrlBase: 'https://review.opendev.org/', + // ID of the insightsProject that owns these repository groups. + insightsProjectSlug: 'OpenStack', + }, +] + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- +type GovernanceYaml = Record< + string, + { + deliverables?: Record + } +> + +interface ParsedProject { + project: string + repos: string[] +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- +async function fetchProjects(yamlUrl: string): Promise { + let text: string + + try { + const response = await fetch(yamlUrl) + + if (!response.ok) { + throw new Error(`HTTP ${response.status} ${response.statusText}`) + } + text = await response.text() + } catch (err) { + throw new Error(`Failed to fetch governance YAML from ${yamlUrl}: ${(err as Error).message}`) + } + + let data: GovernanceYaml + try { + data = yaml.load(text) as GovernanceYaml + if (!data || typeof data !== 'object') { + throw new Error('Parsed YAML is not an object — file format may have changed') + } + } catch (err) { + throw new Error(`Failed to parse governance YAML from ${yamlUrl}: ${(err as Error).message}`) + } + + return Object.entries(data).map(([project, info]) => ({ + project, + repos: Object.values(info?.deliverables ?? {}).flatMap((d) => d.repos ?? []), + })) +} + +function toSlug(name: string): string { + return name + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') +} + +// --------------------------------------------------------------------------- +// Job definition +// --------------------------------------------------------------------------- +const job: IJobDefinition = { + name: 'openstack-repository-groups-sync', + // Run once a week + cronTime: CronTime.everyWeek(), + timeout: 30 * 60, // 30 minutes + + process: async (ctx) => { + ctx.log.info('Starting OpenStack repository groups sync...') + + const dbConnection = await getDbConnection(WRITE_DB_CONFIG(), 3, 0) + const qx = pgpQx(dbConnection) + + for (const source of GOVERNANCE_SOURCES) { + ctx.log.debug(`Processing source: ${source.yamlUrl}`) + + // ------------------------------------------------------------------ + // 1. Resolve the insights project + // ------------------------------------------------------------------ + const insightsProject = await qx.selectOneOrNone( + `SELECT id FROM "insightsProjects" WHERE slug = $(slug) AND "deletedAt" IS NULL`, + { slug: source.insightsProjectSlug }, + ) + + if (!insightsProject) { + ctx.log.warn( + `Insights project with slug '${source.insightsProjectSlug}' not found — skipping source.`, + ) + continue + } + + const insightsProjectId: string = insightsProject.id + ctx.log.debug(`Resolved insights project: ${insightsProjectId}`) + + // ------------------------------------------------------------------ + // 2. Fetch + parse the governance YAML + // ------------------------------------------------------------------ + ctx.log.debug(`Fetching governance YAML...`) + let projects: ParsedProject[] + + try { + projects = await fetchProjects(source.yamlUrl) + } catch (err) { + ctx.log.error({ err }, `Could not load governance YAML — skipping source`) + continue + } + + ctx.log.info(`Parsed ${projects.length} projects from YAML`) + + // ------------------------------------------------------------------ + // 3. Load existing repository groups so we can upsert + // ------------------------------------------------------------------ + const existingGroups = await listRepositoryGroups(qx, { insightsProjectId }) + const existingBySlug = new Map(existingGroups.map((g) => [g.slug, g])) + + let created = 0 + let updated = 0 + let skipped = 0 + + // ------------------------------------------------------------------ + // 4. Bulk-fetch all repo URLs that exist in the DB (single round-trip) + // ------------------------------------------------------------------ + const allCandidateUrls = projects.flatMap(({ repos }) => + repos.map((r) => `${source.repoUrlBase}${r}`), + ) + const foundRepos = await getRepositoriesByUrl(qx, allCandidateUrls) + const foundUrlSet = new Set(foundRepos.map((r) => r.url)) + + // ------------------------------------------------------------------ + // 5. Upsert one repository group per YAML project + // ------------------------------------------------------------------ + for (const { project, repos } of projects) { + if (repos.length === 0) { + ctx.log.debug(`'${project}' has no repos in YAML — skipping`) + skipped++ + continue + } + + const slug = toSlug(project) + const candidateUrls = repos.map((r) => `${source.repoUrlBase}${r}`) + const foundUrls = candidateUrls.filter((u) => foundUrlSet.has(u)) + + if (foundUrls.length === 0) { + ctx.log.debug( + `'${project}': none of the ${candidateUrls.length} repo URLs exist in the repositories table — skipping`, + ) + skipped++ + continue + } + + const missing = candidateUrls.filter((u) => !foundUrlSet.has(u)) + if (missing.length > 0) { + ctx.log.warn( + `'${project}': ${missing.length}/${candidateUrls.length} repos not found in DB` + + ` (first 5: ${missing.slice(0, 5).join(', ')}${missing.length > 5 ? '...' : ''})`, + ) + } + + const existingGroup = existingBySlug.get(slug) + + if (existingGroup) { + await updateRepositoryGroup(qx, existingGroup.id, { + name: project, + slug, + repositories: foundUrls, + }) + ctx.log.info(`Updated '${project}' — ${foundUrls.length} repos`) + updated++ + } else { + await createRepositoryGroup(qx, { + name: project, + slug, + insightsProjectId, + repositories: foundUrls, + }) + ctx.log.info(`Created '${project}' — ${foundUrls.length} repos`) + created++ + } + } + + ctx.log.debug(`Source done — created: ${created}, updated: ${updated}, skipped: ${skipped}`) + } + + ctx.log.info('OpenStack repository groups sync complete') + }, +} + +export default job