From 039ef3d1dc16301f5c379d8a61fe0cb97dc7f8a8 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Mon, 6 Apr 2026 18:40:11 +0100 Subject: [PATCH 1/4] chore: automatic repository groups parsing for openstack Signed-off-by: Joana Maia --- pnpm-lock.yaml | 31 ++- services/apps/cron_service/package.json | 2 + .../src/jobs/openstackRepositoryGroups.job.ts | 185 ++++++++++++++++++ 3 files changed, 208 insertions(+), 10 deletions(-) create mode 100644 services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index cc406b82ae..884ea6f6bc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -682,6 +682,9 @@ importers: cron-time-generator: specifier: ^1.3.0 version: 1.3.2 + js-yaml: + specifier: ^4.1.0 + version: 4.1.1 p-queue: specifier: ^8.1.0 version: 8.1.0 @@ -695,6 +698,9 @@ importers: specifier: ^5.6.3 version: 5.6.3 devDependencies: + '@types/js-yaml': + specifier: ^4.0.9 + version: 4.0.9 '@types/node': specifier: ^20.8.2 version: 20.12.7 @@ -4960,6 +4966,9 @@ packages: '@types/is-stream@1.1.0': resolution: {integrity: sha512-jkZatu4QVbR60mpIzjINmtS1ZF4a/FqdTUTBeQDVOQ2PYyidtwFKr0B5G6ERukKwliq+7mIXvxyppwzG5EgRYg==} + '@types/js-yaml@4.0.9': + resolution: {integrity: sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==} + '@types/json-schema@7.0.15': resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==} @@ -10428,8 +10437,8 @@ snapshots: dependencies: '@aws-crypto/sha256-browser': 3.0.0 '@aws-crypto/sha256-js': 3.0.0 - '@aws-sdk/client-sso-oidc': 3.572.0(@aws-sdk/client-sts@3.572.0) - '@aws-sdk/client-sts': 3.572.0 + '@aws-sdk/client-sso-oidc': 3.572.0 + '@aws-sdk/client-sts': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) '@aws-sdk/core': 3.572.0 '@aws-sdk/credential-provider-node': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0)(@aws-sdk/client-sts@3.572.0) '@aws-sdk/middleware-host-header': 3.567.0 @@ -10623,11 +10632,11 @@ snapshots: transitivePeerDependencies: - aws-crt - '@aws-sdk/client-sso-oidc@3.572.0(@aws-sdk/client-sts@3.572.0)': + '@aws-sdk/client-sso-oidc@3.572.0': dependencies: '@aws-crypto/sha256-browser': 3.0.0 '@aws-crypto/sha256-js': 3.0.0 - '@aws-sdk/client-sts': 3.572.0 + '@aws-sdk/client-sts': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) '@aws-sdk/core': 3.572.0 '@aws-sdk/credential-provider-node': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0)(@aws-sdk/client-sts@3.572.0) '@aws-sdk/middleware-host-header': 3.567.0 @@ -10666,7 +10675,6 @@ snapshots: '@smithy/util-utf8': 2.3.0 tslib: 2.6.2 transitivePeerDependencies: - - '@aws-sdk/client-sts' - aws-crt '@aws-sdk/client-sso@3.556.0': @@ -10842,11 +10850,11 @@ snapshots: transitivePeerDependencies: - aws-crt - '@aws-sdk/client-sts@3.572.0': + '@aws-sdk/client-sts@3.572.0(@aws-sdk/client-sso-oidc@3.572.0)': dependencies: '@aws-crypto/sha256-browser': 3.0.0 '@aws-crypto/sha256-js': 3.0.0 - '@aws-sdk/client-sso-oidc': 3.572.0(@aws-sdk/client-sts@3.572.0) + '@aws-sdk/client-sso-oidc': 3.572.0 '@aws-sdk/core': 3.572.0 '@aws-sdk/credential-provider-node': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0)(@aws-sdk/client-sts@3.572.0) '@aws-sdk/middleware-host-header': 3.567.0 @@ -10885,6 +10893,7 @@ snapshots: '@smithy/util-utf8': 2.3.0 tslib: 2.6.2 transitivePeerDependencies: + - '@aws-sdk/client-sso-oidc' - aws-crt '@aws-sdk/client-sts@3.985.0': @@ -11050,7 +11059,7 @@ snapshots: '@aws-sdk/credential-provider-ini@3.572.0(@aws-sdk/client-sso-oidc@3.572.0)(@aws-sdk/client-sts@3.572.0)': dependencies: - '@aws-sdk/client-sts': 3.572.0 + '@aws-sdk/client-sts': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) '@aws-sdk/credential-provider-env': 3.568.0 '@aws-sdk/credential-provider-process': 3.572.0 '@aws-sdk/credential-provider-sso': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) @@ -11227,7 +11236,7 @@ snapshots: '@aws-sdk/credential-provider-web-identity@3.568.0(@aws-sdk/client-sts@3.572.0)': dependencies: - '@aws-sdk/client-sts': 3.572.0 + '@aws-sdk/client-sts': 3.572.0(@aws-sdk/client-sso-oidc@3.572.0) '@aws-sdk/types': 3.567.0 '@smithy/property-provider': 2.2.0 '@smithy/types': 2.12.0 @@ -11539,7 +11548,7 @@ snapshots: '@aws-sdk/token-providers@3.572.0(@aws-sdk/client-sso-oidc@3.572.0)': dependencies: - '@aws-sdk/client-sso-oidc': 3.572.0(@aws-sdk/client-sts@3.572.0) + '@aws-sdk/client-sso-oidc': 3.572.0 '@aws-sdk/types': 3.567.0 '@smithy/property-provider': 2.2.0 '@smithy/shared-ini-file-loader': 2.4.0 @@ -14496,6 +14505,8 @@ snapshots: dependencies: '@types/node': 20.12.7 + '@types/js-yaml@4.0.9': {} + '@types/json-schema@7.0.15': {} '@types/json5@0.0.29': {} diff --git a/services/apps/cron_service/package.json b/services/apps/cron_service/package.json index 0e66d995ba..ff6738ce13 100644 --- a/services/apps/cron_service/package.json +++ b/services/apps/cron_service/package.json @@ -27,6 +27,7 @@ "@crowd/telemetry": "workspace:*", "@crowd/redis": "workspace:*", "@aws-sdk/client-s3": "^3.700.0", + "js-yaml": "^4.1.0", "@dsnp/parquetjs": "^1.7.0", "cron": "^2.1.0", "cron-time-generator": "^1.3.0", @@ -36,6 +37,7 @@ "typescript": "^5.6.3" }, "devDependencies": { + "@types/js-yaml": "^4.0.9", "@types/node": "^20.8.2", "nodemon": "^3.0.1" } diff --git a/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts new file mode 100644 index 0000000000..38c1511e7d --- /dev/null +++ b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts @@ -0,0 +1,185 @@ +import CronTime from 'cron-time-generator' +import yaml from 'js-yaml' + +import { WRITE_DB_CONFIG, getDbConnection } from '@crowd/data-access-layer/src/database' +import { pgpQx } from '@crowd/data-access-layer/src/queryExecutor' +import { getRepositoriesByUrl } from '@crowd/data-access-layer/src/repositories' +import { + createRepositoryGroup, + listRepositoryGroups, + updateRepositoryGroup, +} from '@crowd/data-access-layer/src/repositoryGroups' + +import { IJobDefinition } from '../types' + +// --------------------------------------------------------------------------- +// Config — one entry per governance YAML source we want to sync. +// Adding a second platform in the future is as simple as appending an entry. +// --------------------------------------------------------------------------- +const GOVERNANCE_SOURCES = [ + { + // OpenStack governance YAML published by the TC + yamlUrl: 'https://opendev.org/openstack/governance/raw/branch/master/reference/projects.yaml', + // Repos in the YAML are listed as "/". + // We convert them to full URLs using this prefix. + repoUrlBase: 'https://review.opendev.org/', + // ID of the insightsProject that owns these repository groups. + insightsProjectSlug: 'OpenStack', + }, +] + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- +type GovernanceYaml = Record< + string, + { + deliverables?: Record + } +> + +interface ParsedProject { + project: string + repos: string[] +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- +async function fetchProjects(yamlUrl: string): Promise { + const response = await fetch(yamlUrl) + if (!response.ok) { + throw new Error(`Failed to fetch governance YAML (${response.status}): ${yamlUrl}`) + } + const text = await response.text() + const data = yaml.load(text) as GovernanceYaml + + return Object.entries(data).map(([project, info]) => ({ + project, + repos: Object.values(info.deliverables ?? {}).flatMap((d) => d.repos ?? []), + })) +} + +function toSlug(name: string): string { + return name + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') +} + +// --------------------------------------------------------------------------- +// Job definition +// --------------------------------------------------------------------------- +const job: IJobDefinition = { + name: 'openstack-repository-groups-sync', + // Run once a week + cronTime: CronTime.everyWeek(), + timeout: 30 * 60, // 30 minutes + + process: async (ctx) => { + ctx.log.debug('Starting OpenStack repository groups sync...') + + const dbConnection = await getDbConnection(WRITE_DB_CONFIG(), 3, 0) + const qx = pgpQx(dbConnection) + + for (const source of GOVERNANCE_SOURCES) { + ctx.log.debug(`Processing source: ${source.yamlUrl}`) + + // ------------------------------------------------------------------ + // 1. Resolve the insights project + // ------------------------------------------------------------------ + const insightsProject = await qx.selectOneOrNone( + `SELECT id FROM "insightsProjects" WHERE slug = $(slug) AND "deletedAt" IS NULL`, + { slug: source.insightsProjectSlug }, + ) + + if (!insightsProject) { + ctx.log.warn( + `Insights project with slug '${source.insightsProjectSlug}' not found — skipping source.`, + ) + continue + } + + const insightsProjectId: string = insightsProject.id + ctx.log.debug(`Resolved insights project: ${insightsProjectId}`) + + // ------------------------------------------------------------------ + // 2. Fetch + parse the governance YAML + // ------------------------------------------------------------------ + ctx.log.debug(`Fetching governance YAML...`) + const projects = await fetchProjects(source.yamlUrl) + ctx.log.debug(`Parsed ${projects.length} projects from YAML`) + + // ------------------------------------------------------------------ + // 3. Load existing repository groups so we can upsert + // ------------------------------------------------------------------ + const existingGroups = await listRepositoryGroups(qx, { insightsProjectId }) + const existingBySlug = new Map(existingGroups.map((g) => [g.slug, g])) + + let created = 0 + let updated = 0 + let skipped = 0 + + // ------------------------------------------------------------------ + // 4. Upsert one repository group per YAML project + // ------------------------------------------------------------------ + for (const { project, repos } of projects) { + if (repos.length === 0) { + ctx.log.debug(`'${project}' has no repos in YAML — skipping`) + skipped++ + continue + } + + const slug = toSlug(project) + const candidateUrls = repos.map((r) => `${source.repoUrlBase}${r}`) + + // Only include repos that actually exist in public.repositories + const foundRepos = await getRepositoriesByUrl(qx, candidateUrls) + const foundUrls = foundRepos.map((r) => r.url) + + if (foundUrls.length === 0) { + ctx.log.debug( + `'${project}': none of the ${candidateUrls.length} repo URLs exist in the repositories table — skipping`, + ) + skipped++ + continue + } + + const missing = candidateUrls.filter((u) => !foundUrls.includes(u)) + if (missing.length > 0) { + ctx.log.warn( + `'${project}': ${missing.length}/${candidateUrls.length} repos not found in DB` + + ` (first 5: ${missing.slice(0, 5).join(', ')}${missing.length > 5 ? '...' : ''})`, + ) + } + + const existingGroup = existingBySlug.get(slug) + + if (existingGroup) { + await updateRepositoryGroup(qx, existingGroup.id, { + name: project, + slug, + repositories: foundUrls, + }) + ctx.log.debug(`Updated '${project}' — ${foundUrls.length} repos`) + updated++ + } else { + await createRepositoryGroup(qx, { + name: project, + slug, + insightsProjectId, + repositories: foundUrls, + }) + ctx.log.debug(`Created '${project}' — ${foundUrls.length} repos`) + created++ + } + } + + ctx.log.debug(`Source done — created: ${created}, updated: ${updated}, skipped: ${skipped}`) + } + + ctx.log.debug('OpenStack repository groups sync complete') + }, +} + +export default job From b739c710debccab849608763648250fadbe32f1e Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Mon, 6 Apr 2026 18:48:24 +0100 Subject: [PATCH 2/4] chore: error handling Signed-off-by: Joana Maia --- .../src/jobs/openstackRepositoryGroups.job.ts | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts index 38c1511e7d..b757d7b948 100644 --- a/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts +++ b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts @@ -47,12 +47,28 @@ interface ParsedProject { // Helpers // --------------------------------------------------------------------------- async function fetchProjects(yamlUrl: string): Promise { - const response = await fetch(yamlUrl) - if (!response.ok) { - throw new Error(`Failed to fetch governance YAML (${response.status}): ${yamlUrl}`) + let text: string + + try { + const response = await fetch(yamlUrl) + + if (!response.ok) { + throw new Error(`HTTP ${response.status} ${response.statusText}`) + } + text = await response.text() + } catch (err) { + throw new Error(`Failed to fetch governance YAML from ${yamlUrl}: ${(err as Error).message}`) + } + + let data: GovernanceYaml + try { + data = yaml.load(text) as GovernanceYaml + if (!data || typeof data !== 'object') { + throw new Error('Parsed YAML is not an object — file format may have changed') + } + } catch (err) { + throw new Error(`Failed to parse governance YAML from ${yamlUrl}: ${(err as Error).message}`) } - const text = await response.text() - const data = yaml.load(text) as GovernanceYaml return Object.entries(data).map(([project, info]) => ({ project, @@ -107,7 +123,15 @@ const job: IJobDefinition = { // 2. Fetch + parse the governance YAML // ------------------------------------------------------------------ ctx.log.debug(`Fetching governance YAML...`) - const projects = await fetchProjects(source.yamlUrl) + let projects: ParsedProject[] + + try { + projects = await fetchProjects(source.yamlUrl) + } catch (err) { + ctx.log.error({ err }, `Could not load governance YAML — skipping source`) + continue + } + ctx.log.debug(`Parsed ${projects.length} projects from YAML`) // ------------------------------------------------------------------ From f10c0e52809c4e16fa42194e416194f9e5998db4 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Tue, 7 Apr 2026 10:50:52 +0100 Subject: [PATCH 3/4] chore: address pr comments Signed-off-by: Joana Maia --- .../src/jobs/openstackRepositoryGroups.job.ts | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts index b757d7b948..9e0cf3ed24 100644 --- a/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts +++ b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts @@ -72,7 +72,7 @@ async function fetchProjects(yamlUrl: string): Promise { return Object.entries(data).map(([project, info]) => ({ project, - repos: Object.values(info.deliverables ?? {}).flatMap((d) => d.repos ?? []), + repos: Object.values(info?.deliverables ?? {}).flatMap((d) => d.repos ?? []), })) } @@ -145,7 +145,16 @@ const job: IJobDefinition = { let skipped = 0 // ------------------------------------------------------------------ - // 4. Upsert one repository group per YAML project + // 4. Bulk-fetch all repo URLs that exist in the DB (single round-trip) + // ------------------------------------------------------------------ + const allCandidateUrls = projects.flatMap(({ repos }) => + repos.map((r) => `${source.repoUrlBase}${r}`), + ) + const foundRepos = await getRepositoriesByUrl(qx, allCandidateUrls) + const foundUrlSet = new Set(foundRepos.map((r) => r.url)) + + // ------------------------------------------------------------------ + // 5. Upsert one repository group per YAML project // ------------------------------------------------------------------ for (const { project, repos } of projects) { if (repos.length === 0) { @@ -156,10 +165,7 @@ const job: IJobDefinition = { const slug = toSlug(project) const candidateUrls = repos.map((r) => `${source.repoUrlBase}${r}`) - - // Only include repos that actually exist in public.repositories - const foundRepos = await getRepositoriesByUrl(qx, candidateUrls) - const foundUrls = foundRepos.map((r) => r.url) + const foundUrls = candidateUrls.filter((u) => foundUrlSet.has(u)) if (foundUrls.length === 0) { ctx.log.debug( @@ -169,7 +175,7 @@ const job: IJobDefinition = { continue } - const missing = candidateUrls.filter((u) => !foundUrls.includes(u)) + const missing = candidateUrls.filter((u) => !foundUrlSet.has(u)) if (missing.length > 0) { ctx.log.warn( `'${project}': ${missing.length}/${candidateUrls.length} repos not found in DB` + From f272d0b82ea5d8b5201c452688b273bc36197863 Mon Sep 17 00:00:00 2001 From: Joana Maia Date: Wed, 8 Apr 2026 11:13:39 +0100 Subject: [PATCH 4/4] chore: update some debug logs to info Signed-off-by: Joana Maia --- .../src/jobs/openstackRepositoryGroups.job.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts index 9e0cf3ed24..502a596333 100644 --- a/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts +++ b/services/apps/cron_service/src/jobs/openstackRepositoryGroups.job.ts @@ -93,7 +93,7 @@ const job: IJobDefinition = { timeout: 30 * 60, // 30 minutes process: async (ctx) => { - ctx.log.debug('Starting OpenStack repository groups sync...') + ctx.log.info('Starting OpenStack repository groups sync...') const dbConnection = await getDbConnection(WRITE_DB_CONFIG(), 3, 0) const qx = pgpQx(dbConnection) @@ -132,7 +132,7 @@ const job: IJobDefinition = { continue } - ctx.log.debug(`Parsed ${projects.length} projects from YAML`) + ctx.log.info(`Parsed ${projects.length} projects from YAML`) // ------------------------------------------------------------------ // 3. Load existing repository groups so we can upsert @@ -191,7 +191,7 @@ const job: IJobDefinition = { slug, repositories: foundUrls, }) - ctx.log.debug(`Updated '${project}' — ${foundUrls.length} repos`) + ctx.log.info(`Updated '${project}' — ${foundUrls.length} repos`) updated++ } else { await createRepositoryGroup(qx, { @@ -200,7 +200,7 @@ const job: IJobDefinition = { insightsProjectId, repositories: foundUrls, }) - ctx.log.debug(`Created '${project}' — ${foundUrls.length} repos`) + ctx.log.info(`Created '${project}' — ${foundUrls.length} repos`) created++ } } @@ -208,7 +208,7 @@ const job: IJobDefinition = { ctx.log.debug(`Source done — created: ${created}, updated: ${updated}, skipped: ${skipped}`) } - ctx.log.debug('OpenStack repository groups sync complete') + ctx.log.info('OpenStack repository groups sync complete') }, }