From b6beb84df96449e98f6d3f970c252a6880393207 Mon Sep 17 00:00:00 2001 From: timl Date: Wed, 15 Oct 2025 19:16:15 +0800 Subject: [PATCH 1/3] Support changelog for the new branch model: 1, Retrieve commit hashes for a release after enabling the new branch model. 2, Create a query to fetch pull request (PR) information from GitHub using commit hashes. 3, Support retrieving PR changelogs for both the old and new branch models. Signed-off-by: timl --- scripts/generate-changelog | 136 ++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 2 deletions(-) diff --git a/scripts/generate-changelog b/scripts/generate-changelog index f62d0ebf044..0d50630e721 100755 --- a/scripts/generate-changelog +++ b/scripts/generate-changelog @@ -55,6 +55,7 @@ Usage: """ import os import sys +import subprocess from argparse import ArgumentParser from collections import OrderedDict from datetime import date, datetime @@ -77,6 +78,8 @@ LABEL_WONTFIX, LABEL_INVALID, LABEL_DUPLICATE = 'wontfix', 'invalid', 'duplicate LABEL_BUG = 'bug' LABEL_PERFORMANCE, LABEL_SHUFFLE = 'performance', 'shuffle' LABEL_FEATURE, LABEL_SQL = 'feature request', 'SQL' +# The release version from which the release branch changes (e.g., branch-YY.MM --> release/YY.MM) +FROM_RELEASE = '25.12' # Queries query_pr = """ query ($baseRefName: String!, $after: String) { @@ -148,7 +151,111 @@ query ($after: String, $since: DateTime) { } } """ +query_pr_by_commit = """ +query ($sha: String!) { + repository(name: "spark-rapids", owner: "NVIDIA") { + commit: object(expression: $sha) { + ... on Commit { + associatedPullRequests(first: 10) { + edges { + node { + title + number + state + url + baseRefName + labels(first: 10) { + nodes { + name + } + } + mergedAt + projectItems(first: 10) { + nodes { + roadmap: fieldValueByName(name: "Roadmap") { + ... on ProjectV2ItemFieldSingleSelectValue { + name + } + } + } + } + } + } + } + } + } + } +} +""" +# Get the previous release version string(yy.mm format, 2 months before the current version) +# from the current version, e.g. YY.MM2[current] --> YY.MM1[previous] +# param current_ver: the current version, e.g. YY.MM2 +# return: the previous version, e.g. YY.MM1, +def get_prev_release_version(current_ver: str): + year, month = map(int, current_ver.split(".")) + if month > 2: + new_year = year + new_month = month - 2 + else: + new_year = year - 1 + new_month = month + 10 + prev_ver = f"{new_year:02d}.{new_month:02d}" + return prev_ver + +# Get the commit hashes between two branches or release tags. +# param releases: set of release versions, e.g. {'YY.MM2', 'YY.MM1'} +# return: dict of commit hashes, e.g. {YY.MM2: [sha1, sha2, ...], YY.MM1: [shaX, shaY, ...]} +def get_commits(releases: set): + rel_list = list(releases) + ver_commits = {} + count = len(rel_list) # descending version order assured + for i, to_rel in enumerate(rel_list): + to_branch = f"origin/release/{to_rel}" + # commits of releases[YY.MM2, YY.MM1] --> git log "YY.MM2..YY.MM1" for YY.MM2, "YY.MM1..YY.MM0" for YY.MM1 + if i + 1 < count: + from_rel = rel_list[i + 1] + else: + from_rel = get_prev_release_version(to_rel) + based_rel = float(from_rel) + if based_rel < float(FROM_RELEASE): + from_branch = f"origin/branch-{from_rel}" + else: + from_branch = f"origin/release/{from_rel}" + + # Get all the commit hashes, excluding those commits whose title contains '[bot]' + git_log_args = [ + "git", "--no-pager", "log", + f"{from_branch}..{to_branch}", "--pretty=format:%h", + "--grep=[bot]", "-F", "--invert-grep" + ] + + # Use check=True to raise exception if git fails, making errors explicit + result = subprocess.run(git_log_args, capture_output=True, text=True, check=True) + + commits = result.stdout.splitlines() + ver_commits[to_rel] = commits + return ver_commits + +# Get the PR list from commit hashes +# param ver_commits, e.g. {v1: [sha1, sha2, ...], v2: [shaX, shaY, ...]} +# param token: the token for the API +# return: list of PRs associated with the commit hashes, e.g. [{PR1 info}, {PR2 info}, ...] +def get_pr_via_commits(ver_commits: set, token: str): + pr_list = [] + for version, commits in ver_commits.items(): + for sha in commits: + res = post(query=query_pr_by_commit, token=token, variable={'sha': sha}) + try: + pr_item = res.json()['data']['repository']['commit']['associatedPullRequests']['edges'][0]['node'] + pr_item['ver'] = version + # Handle the case of multiple commits being associated with the same PR + if pr_item not in pr_list and pr_item['mergedAt'] is not None: + pr_list.append(pr_item) + except Exception: + print(f"Commit sha '{sha}' does not have the associated Pull Request") + continue + return pr_list def process_changelog(resource_type: str, changelog: dict, releases: set, projects: set, token: str): if resource_type == PULL_REQUESTS: @@ -175,6 +282,11 @@ def process_changelog(resource_type: str, changelog: dict, releases: set, projec ver = item["projectItems"]["nodes"][0]['roadmap']['name'] project = f"{RELEASE} {ver}" + # Overwrite project version after the {FROM_RELEASE} if provided + if item.get('ver') is not None: + ver = item['ver'] + project = f"{RELEASE} {ver}" + if not release_project(project, projects): continue @@ -207,11 +319,29 @@ def process_changelog(resource_type: str, changelog: dict, releases: set, projec }) +# Get the PRs based on the release versions def process_pr(releases: set, token: str): pr = [] - for rel in releases: + current_ver = list(releases)[0] + current_ver_float = float(current_ver) + based_rel = float(FROM_RELEASE) + + # Both releases are after {FROM_RELEASE} + if current_ver_float > based_rel: + ver_commits = get_commits(releases) + pr = get_pr_via_commits(ver_commits, token) + # One release is the {FROM_RELEASE}, the other is before the {FROM_RELEASE} + elif current_ver_float == based_rel: + ver_commits = get_commits({FROM_RELEASE}) + pr = get_pr_via_commits(ver_commits, token) + prev_ver = get_prev_release_version(current_ver=FROM_RELEASE) pr.extend(fetch(resource_type=PULL_REQUESTS, token=token, - variables={'baseRefName': f"branch-{rel}"})) + variables={'baseRefName': f"branch-{prev_ver}"})) + # Both releases are before the {FROM_RELEASE} + else: + for rel in releases: + pr.extend(fetch(resource_type=PULL_REQUESTS, token=token, + variables={'baseRefName': f"branch-{rel}"})) return pr @@ -301,6 +431,8 @@ def main(rels: str, path: str, token: str): try: changelog = {} # changelog dict releases = {x.strip() for x in rels.split(',')} + # Sort releases in descending order for the follow-up operations + releases = sorted(releases, reverse=True) projects = {f"{RELEASE} {rel}" for rel in releases} print('Processing pull requests ...') From ec8f8bf7d2270846acc73b142cd6deb84b93746c Mon Sep 17 00:00:00 2001 From: timl Date: Wed, 29 Oct 2025 12:05:17 +0800 Subject: [PATCH 2/3] Update for the 'Copilot' review suggestions Signed-off-by: timl --- scripts/generate-changelog | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/generate-changelog b/scripts/generate-changelog index 0d50630e721..cc881b8e34f 100755 --- a/scripts/generate-changelog +++ b/scripts/generate-changelog @@ -188,7 +188,7 @@ query ($sha: String!) { } """ -# Get the previous release version string(yy.mm format, 2 months before the current version) +# Get the previous release version string(YY.MM format, 2 months before the current version) # from the current version, e.g. YY.MM2[current] --> YY.MM1[previous] # param current_ver: the current version, e.g. YY.MM2 # return: the previous version, e.g. YY.MM1, @@ -252,8 +252,8 @@ def get_pr_via_commits(ver_commits: set, token: str): # Handle the case of multiple commits being associated with the same PR if pr_item not in pr_list and pr_item['mergedAt'] is not None: pr_list.append(pr_item) - except Exception: - print(f"Commit sha '{sha}' does not have the associated Pull Request") + except Exception as e: + print(f"Exception: {e}, commit sha '{sha}' does not have the associated Pull Request") continue return pr_list From 10e1f7ab2a5ea4f96be54ba51415578a7ad55e8b Mon Sep 17 00:00:00 2001 From: timl Date: Fri, 31 Oct 2025 16:28:09 +0800 Subject: [PATCH 3/3] Add a note indicating that only the last two releases are included in the changelog Signed-off-by: timl --- scripts/generate-changelog | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/generate-changelog b/scripts/generate-changelog index cc881b8e34f..464d44add24 100755 --- a/scripts/generate-changelog +++ b/scripts/generate-changelog @@ -326,6 +326,7 @@ def process_pr(releases: set, token: str): current_ver_float = float(current_ver) based_rel = float(FROM_RELEASE) + # Note: only the last 2 releases are supported/included in the changelog # Both releases are after {FROM_RELEASE} if current_ver_float > based_rel: ver_commits = get_commits(releases)