Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
ALTER TABLE "collectionsRepositories" REPLICA IDENTITY FULL;
GRANT SELECT ON "collectionsRepositories" TO sequin;
Comment thread
epipav marked this conversation as resolved.

DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication_tables
WHERE pubname = 'sequin_pub' AND tablename = 'collectionsRepositories'
) THEN
ALTER PUBLICATION sequin_pub ADD TABLE "collectionsRepositories";
END IF;
END $$;

CREATE INDEX IF NOT EXISTS "ix_collectionsRepositories_updatedAt_id" ON "collectionsRepositories" ("updatedAt", id);
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
SCHEMA >
`collectionId` String,
`totalCount` UInt64,
`totalSoftwareValue` UInt64,
`totalContributorCount` UInt64,
`totalContributorCount30d` UInt64,
`commitsCount30d` UInt64,
`mostActiveProjects` String,
`medianIssueCloseTimeSeconds` Float64,
`medianIssueCloseTimeSeconds30d` Float64,
`medianPrResolutionTimeSeconds` Float64,
`medianPrResolutionTimeSeconds30d` Float64,
`projectsWithGithubPrActivity` UInt64,
`projectsWithGithubIssueActivity` UInt64

ENGINE MergeTree
ENGINE_SORTING_KEY collectionId
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
SCHEMA >
`projectId` String,
`name` String,
`slug` String,
`githubRepoLink` String,
`stars` UInt64,
`stars30d` UInt64,
`forks` UInt64,
`forks30d` UInt64,
`downloads` Nullable(UInt64),
`downloads30d` Nullable(Int64),
`dockerPulls` Nullable(UInt64),
`dockerPulls30d` Nullable(Int64),
`dependentRepos` Nullable(UInt64),
`dependentPackages` Nullable(UInt64),
`commits` UInt64,
`commits30d` UInt64,
`contributors` UInt64,
`contributors30d` UInt64,
`newContributors30d` UInt64,
`mergeRate` Nullable(Float64),
`mergeRate30d` Nullable(Float64),
`prResolveTimeSeconds` Nullable(Float64),
`prResolveTimeSeconds30d` Nullable(Float64),
`issueCloseTimeSeconds` Nullable(Float64),
`issueCloseTimeSeconds30d` Nullable(Float64),
`issueResponseTimeSeconds` Nullable(Float64),
`issueResponseTimeSeconds30d` Nullable(Float64),
`noResponseIssues` Nullable(UInt64),
`noResponseIssues30d` Nullable(UInt64),
`vulnerabilities` Nullable(UInt64),
`vulnerabilities30d` Nullable(UInt64),
`cocomoValue` UInt64

ENGINE MergeTree
ENGINE_SORTING_KEY projectId
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
DESCRIPTION >
- `collectionsRepositories` contains junction table linking collections to repositories.
- `id` is the primary key identifier for the collection-repository relationship record.
- `collectionId` links to the collection this relationship belongs to.
- `repoId` links to the repository in this relationship.
- `createdAt` and `updatedAt` are standard timestamp fields for record lifecycle tracking.
- `deletedAt` is used for soft delete functionality - null means active record, timestamp means logically deleted.

SCHEMA >
`id` String `json:$.record.id`,
`collectionId` String `json:$.record.collectionId`,
`repoId` String `json:$.record.repoId`,
`createdAt` DateTime64(3) `json:$.record.createdAt`,
`updatedAt` DateTime64(3) `json:$.record.updatedAt`,
`deletedAt` Nullable(DateTime64(3)) `json:$.record.deletedAt`

ENGINE ReplacingMergeTree
ENGINE_PARTITION_KEY toYear(createdAt)
ENGINE_SORTING_KEY repoId, collectionId
ENGINE_VER updatedAt
16 changes: 16 additions & 0 deletions services/libs/tinybird/pipes/agentic_ai_momentum_glance.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
NODE at_a_glance_metrics
SQL >
SELECT
totalCount as total_count,
totalSoftwareValue as total_software_value,
totalContributorCount as total_contributor_count,
totalContributorCount30d as total_contributor_count_30d,
commitsCount30d as commits_count_30d,
mostActiveProjects as most_active_projects,
medianIssueCloseTimeSeconds as median_issue_close_time_seconds,
medianIssueCloseTimeSeconds30d as median_issue_close_time_seconds_30d,
medianPrResolutionTimeSeconds as median_pr_resolution_time_seconds,
medianPrResolutionTimeSeconds30d as median_pr_resolution_time_seconds_30d,
projectsWithGithubPrActivity as projects_with_github_pr_activity,
projectsWithGithubIssueActivity as projects_with_github_issue_activity
FROM agentic_ai_momentum_glance_ds
204 changes: 204 additions & 0 deletions services/libs/tinybird/pipes/agentic_ai_momentum_glance_copy.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
DESCRIPTION >
Pre-computes at-a-glance metrics for the Agentic AI collection.
Filters by specific repositories in the collection via collectionsRepositories.
Runs daily and stores the result in agentic_ai_momentum_glance_ds.

NODE ai_repos
SQL >
SELECT r.url, r.segmentId, r.insightsProjectId
FROM collectionsRepositories cr FINAL
JOIN repositories r FINAL ON cr.repoId = r.id
WHERE
cr.collectionId = '3cf46a5d-abd0-440e-95f9-4ae9458903a8'
AND isNull (cr.deletedAt)
AND isNull (r.deletedAt)
AND r.enabled = true

NODE ai_projects
SQL >
SELECT id, name, slug, softwareValue
FROM insights_projects_populated_ds
WHERE id IN (SELECT DISTINCT insightsProjectId FROM ai_repos)

NODE total_contributor_count
SQL >
SELECT countDistinct(memberId) AS totalContributorCount
FROM activityRelations_deduplicated_cleaned_bucket_union
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND (type, platform) IN (
SELECT activityType, platform
FROM activityTypes
WHERE isCodeContribution = 1 OR isCollaboration = 1
)

NODE total_contributor_count_30d
SQL >
SELECT countDistinct(memberId) AS totalContributorCount30d
FROM activityRelations_deduplicated_cleaned_bucket_union
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND timestamp >= now() - INTERVAL 30 DAY
AND (type, platform) IN (
SELECT activityType, platform
FROM activityTypes
WHERE isCodeContribution = 1 OR isCollaboration = 1
)

NODE commits_count_30d
SQL >
SELECT count() AS commitsCount30d
FROM activityRelations_deduplicated_cleaned_bucket_union
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND type IN ('authored-commit', 'committed-commit')
AND timestamp >= now() - INTERVAL 30 DAY

NODE new_contributors_per_project
SQL >
SELECT segmentId, countIf(first_activity >= now() - INTERVAL 30 DAY) AS newContributors
FROM
(
SELECT segmentId, memberId, min(timestamp) AS first_activity
FROM activityRelations_deduplicated_cleaned_bucket_union
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND (type, platform) IN (
SELECT activityType, platform
FROM activityTypes
WHERE isCodeContribution = 1 OR isCollaboration = 1
)
GROUP BY segmentId, memberId
)
GROUP BY segmentId

NODE most_active_projects
SQL >
SELECT
coalesce(
arrayStringConcat(
arrayMap(
x
-> toJSONString(map('name', x .1, 'slug', x .2, 'newContributors', toString(x .3))),
arraySlice(
arrayReverseSort(x -> x .3, groupArray((p.name, p.slug, ncp.newContributors))),
1,
3
)
),
','
),
''
) AS mostActiveProjects
FROM new_contributors_per_project ncp
LEFT JOIN
(
SELECT segmentId, any (insightsProjectId) AS insightsProjectId
FROM ai_repos
GROUP BY segmentId
) ar
ON ar.segmentId = ncp.segmentId
LEFT JOIN ai_projects p ON p.id = ar.insightsProjectId

Comment thread
epipav marked this conversation as resolved.
NODE median_issue_close_time
SQL >
SELECT coalesce(median(closedInSeconds), 0) AS medianIssueCloseTimeSeconds
FROM issues_analyzed
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND isNotNull(closedAt)
AND closedInSeconds > 0

NODE median_issue_close_time_30d
SQL >
SELECT coalesce(median(closedInSeconds), 0) AS medianIssueCloseTimeSeconds30d
FROM issues_analyzed
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND isNotNull(closedAt)
AND closedInSeconds > 0
AND closedAt >= now() - INTERVAL 30 DAY

NODE median_pr_resolution_time
SQL >
SELECT coalesce(median(resolvedInSeconds), 0) AS medianPrResolutionTimeSeconds
FROM pull_requests_analyzed
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND isNotNull(resolvedAt)
AND resolvedInSeconds > 0

NODE median_pr_resolution_time_30d
SQL >
SELECT coalesce(median(resolvedInSeconds), 0) AS medianPrResolutionTimeSeconds30d
FROM pull_requests_analyzed
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND isNotNull(resolvedAt)
AND resolvedInSeconds > 0
AND resolvedAt >= now() - INTERVAL 30 DAY

NODE projects_with_github_pr
SQL >
SELECT countDistinct(segmentId) AS projectsWithGithubPrActivity
FROM pull_requests_analyzed
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND platform = 'github'

NODE projects_with_github_issue
SQL >
SELECT countDistinct(segmentId) AS projectsWithGithubIssueActivity
FROM issues_analyzed
WHERE
segmentId IN (SELECT DISTINCT segmentId FROM ai_repos)
AND channel IN (SELECT url FROM ai_repos)
AND platform = 'github'

NODE at_a_glance_metrics
SQL >
SELECT
'3cf46a5d-abd0-440e-95f9-4ae9458903a8' AS collectionId,
count(*) AS totalCount,
sum(softwareValue) AS totalSoftwareValue,
any (total_contributor_count.totalContributorCount) AS totalContributorCount,
any (total_contributor_count_30d.totalContributorCount30d) AS totalContributorCount30d,
any (commits_count_30d.commitsCount30d) AS commitsCount30d,
concat('[', any (most_active_projects.mostActiveProjects), ']') AS mostActiveProjects,
any (median_issue_close_time.medianIssueCloseTimeSeconds) AS medianIssueCloseTimeSeconds,
any (
median_issue_close_time_30d.medianIssueCloseTimeSeconds30d
) AS medianIssueCloseTimeSeconds30d,
any (median_pr_resolution_time.medianPrResolutionTimeSeconds) AS medianPrResolutionTimeSeconds,
any (
median_pr_resolution_time_30d.medianPrResolutionTimeSeconds30d
) AS medianPrResolutionTimeSeconds30d,
any (projects_with_github_pr.projectsWithGithubPrActivity) AS projectsWithGithubPrActivity,
any (
projects_with_github_issue.projectsWithGithubIssueActivity
) AS projectsWithGithubIssueActivity
FROM ai_projects
CROSS JOIN total_contributor_count
CROSS JOIN total_contributor_count_30d
CROSS JOIN commits_count_30d
CROSS JOIN most_active_projects
CROSS JOIN median_issue_close_time
CROSS JOIN median_issue_close_time_30d
CROSS JOIN median_pr_resolution_time
CROSS JOIN median_pr_resolution_time_30d
CROSS JOIN projects_with_github_pr
CROSS JOIN projects_with_github_issue

TYPE COPY
TARGET_DATASOURCE agentic_ai_momentum_glance_ds
COPY_MODE replace
COPY_SCHEDULE 0 2 * * *
36 changes: 36 additions & 0 deletions services/libs/tinybird/pipes/agentic_ai_projects_list.pipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
NODE ai_projects_list
SQL >
SELECT
projectId as project_id,
name,
slug,
githubRepoLink as github_repo_link,
stars,
stars30d as stars_30d,
forks,
forks30d as forks_30d,
downloads,
downloads30d as downloads_30d,
dockerPulls as docker_pulls,
dockerPulls30d as docker_pulls_30d,
dependentRepos as dependent_repos,
dependentPackages as dependent_packages,
commits,
commits30d as commits_30d,
contributors,
contributors30d as contributors_30d,
newContributors30d as new_contributors_30d,
mergeRate as merge_rate,
mergeRate30d as merge_rate_30d,
prResolveTimeSeconds as pr_resolve_time_seconds,
prResolveTimeSeconds30d as pr_resolve_time_seconds_30d,
issueCloseTimeSeconds as issue_close_time_seconds,
issueCloseTimeSeconds30d as issue_close_time_seconds_30d,
issueResponseTimeSeconds as issue_response_time_seconds,
issueResponseTimeSeconds30d as issue_response_time_seconds_30d,
noResponseIssues as no_response_issues,
noResponseIssues30d as no_response_issues_30d,
vulnerabilities,
vulnerabilities30d as vulnerabilities_30d,
cocomoValue as cocomo_value
FROM agentic_ai_projects_list_ds
Comment thread
epipav marked this conversation as resolved.
Loading
Loading