Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 135 additions & 2 deletions scripts/generate-changelog
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Usage:
"""
import os
import sys
import subprocess
from argparse import ArgumentParser
from collections import OrderedDict
from datetime import date, datetime
Expand All @@ -77,6 +78,8 @@ LABEL_WONTFIX, LABEL_INVALID, LABEL_DUPLICATE = 'wontfix', 'invalid', 'duplicate
LABEL_BUG = 'bug'
LABEL_PERFORMANCE, LABEL_SHUFFLE = 'performance', 'shuffle'
LABEL_FEATURE, LABEL_SQL = 'feature request', 'SQL'
# The release version from which the release branch changes (e.g., branch-YY.MM --> release/YY.MM)
FROM_RELEASE = '25.12'
# Queries
query_pr = """
query ($baseRefName: String!, $after: String) {
Expand Down Expand Up @@ -148,7 +151,111 @@ query ($after: String, $since: DateTime) {
}
}
"""
query_pr_by_commit = """
query ($sha: String!) {
repository(name: "spark-rapids", owner: "NVIDIA") {
commit: object(expression: $sha) {
... on Commit {
associatedPullRequests(first: 10) {
edges {
node {
title
number
state
url
baseRefName
labels(first: 10) {
nodes {
name
}
}
mergedAt
projectItems(first: 10) {
nodes {
roadmap: fieldValueByName(name: "Roadmap") {
... on ProjectV2ItemFieldSingleSelectValue {
name
}
}
}
}
}
}
}
}
}
}
}
"""

# Get the previous release version string(YY.MM format, 2 months before the current version)
# from the current version, e.g. YY.MM2[current] --> YY.MM1[previous]
# param current_ver: the current version, e.g. YY.MM2
# return: the previous version, e.g. YY.MM1,
def get_prev_release_version(current_ver: str):
year, month = map(int, current_ver.split("."))
if month > 2:
new_year = year
new_month = month - 2
else:
new_year = year - 1
new_month = month + 10
prev_ver = f"{new_year:02d}.{new_month:02d}"
return prev_ver

# Get the commit hashes between two branches or release tags.
# param releases: set of release versions, e.g. {'YY.MM2', 'YY.MM1'}
# return: dict of commit hashes, e.g. {YY.MM2: [sha1, sha2, ...], YY.MM1: [shaX, shaY, ...]}
def get_commits(releases: set):
rel_list = list(releases)
ver_commits = {}
count = len(rel_list) # descending version order assured
for i, to_rel in enumerate(rel_list):
to_branch = f"origin/release/{to_rel}"
# commits of releases[YY.MM2, YY.MM1] --> git log "YY.MM2..YY.MM1" for YY.MM2, "YY.MM1..YY.MM0" for YY.MM1
if i + 1 < count:
from_rel = rel_list[i + 1]
else:
from_rel = get_prev_release_version(to_rel)
based_rel = float(from_rel)
if based_rel < float(FROM_RELEASE):
from_branch = f"origin/branch-{from_rel}"
else:
from_branch = f"origin/release/{from_rel}"

# Get all the commit hashes, excluding those commits whose title contains '[bot]'
git_log_args = [
"git", "--no-pager", "log",
f"{from_branch}..{to_branch}", "--pretty=format:%h",
"--grep=[bot]", "-F", "--invert-grep"
]

# Use check=True to raise exception if git fails, making errors explicit
result = subprocess.run(git_log_args, capture_output=True, text=True, check=True)

commits = result.stdout.splitlines()
ver_commits[to_rel] = commits
return ver_commits

# Get the PR list from commit hashes
# param ver_commits, e.g. {v1: [sha1, sha2, ...], v2: [shaX, shaY, ...]}
# param token: the token for the API
# return: list of PRs associated with the commit hashes, e.g. [{PR1 info}, {PR2 info}, ...]
def get_pr_via_commits(ver_commits: set, token: str):
pr_list = []
for version, commits in ver_commits.items():
for sha in commits:
res = post(query=query_pr_by_commit, token=token, variable={'sha': sha})
try:
pr_item = res.json()['data']['repository']['commit']['associatedPullRequests']['edges'][0]['node']
pr_item['ver'] = version
# Handle the case of multiple commits being associated with the same PR
if pr_item not in pr_list and pr_item['mergedAt'] is not None:
Comment thread
NvTimLiu marked this conversation as resolved.
pr_list.append(pr_item)
except Exception as e:
print(f"Exception: {e}, commit sha '{sha}' does not have the associated Pull Request")
continue
return pr_list

def process_changelog(resource_type: str, changelog: dict, releases: set, projects: set, token: str):
if resource_type == PULL_REQUESTS:
Expand All @@ -175,6 +282,11 @@ def process_changelog(resource_type: str, changelog: dict, releases: set, projec
ver = item["projectItems"]["nodes"][0]['roadmap']['name']
project = f"{RELEASE} {ver}"

# Overwrite project version after the {FROM_RELEASE} if provided
if item.get('ver') is not None:
ver = item['ver']
project = f"{RELEASE} {ver}"

if not release_project(project, projects):
continue

Expand Down Expand Up @@ -207,11 +319,30 @@ def process_changelog(resource_type: str, changelog: dict, releases: set, projec
})


# Get the PRs based on the release versions
def process_pr(releases: set, token: str):
pr = []
for rel in releases:
current_ver = list(releases)[0]
current_ver_float = float(current_ver)
based_rel = float(FROM_RELEASE)

# Note: only the last 2 releases are supported/included in the changelog
# Both releases are after {FROM_RELEASE}
if current_ver_float > based_rel:
ver_commits = get_commits(releases)
pr = get_pr_via_commits(ver_commits, token)
# One release is the {FROM_RELEASE}, the other is before the {FROM_RELEASE}
elif current_ver_float == based_rel:
ver_commits = get_commits({FROM_RELEASE})
pr = get_pr_via_commits(ver_commits, token)
prev_ver = get_prev_release_version(current_ver=FROM_RELEASE)
pr.extend(fetch(resource_type=PULL_REQUESTS, token=token,
variables={'baseRefName': f"branch-{rel}"}))
variables={'baseRefName': f"branch-{prev_ver}"}))
# Both releases are before the {FROM_RELEASE}
else:
for rel in releases:
pr.extend(fetch(resource_type=PULL_REQUESTS, token=token,
variables={'baseRefName': f"branch-{rel}"}))
return pr


Expand Down Expand Up @@ -301,6 +432,8 @@ def main(rels: str, path: str, token: str):
try:
changelog = {} # changelog dict
releases = {x.strip() for x in rels.split(',')}
# Sort releases in descending order for the follow-up operations
releases = sorted(releases, reverse=True)
Copy link

Copilot AI Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Converting releases from a set to a sorted list changes its type, but the variable name and usage throughout the codebase still treat it as a set. This could cause issues in functions that expect a set type. Consider using a different variable name like sorted_releases for the sorted list.

Copilot uses AI. Check for mistakes.
projects = {f"{RELEASE} {rel}" for rel in releases}

print('Processing pull requests ...')
Expand Down