Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 274 additions & 0 deletions .github/scripts/generate_release_notes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
#!/usr/bin/env python3
"""Generate customer-facing release notes from git commits using Bedrock tool use."""

import argparse
import json
import os
import re
import subprocess
import sys
from datetime import date

import boto3

MODEL_ID = "us.anthropic.claude-opus-4-6-v1"
Comment thread
karthikbekalp marked this conversation as resolved.

TOOL_DEFINITION = {
"name": "emit_release_notes",
"description": "Emit structured release note entries. Call this once with ALL entries.",
"input_schema": {
"type": "object",
"properties": {
"entries": {
"type": "array",
"items": {
"type": "object",
"properties": {
"category": {
"type": "string",
"enum": ["breaking", "features", "bug fixes", "deprecations"],
},
"description": {
"type": "string",
"description": "Customer-facing description of the change. Write from the user's perspective.",
},
"reference": {
"type": "string",
"description": "PR link like (#1234) or commit hash in backticks like (`abc1234`)",
},
},
"required": ["category", "description", "reference"],
},
}
},
"required": ["entries"],
},
}

SYSTEM_PROMPT = """\
You are writing customer-facing release notes for {repo_name}.

{readme_section}

You will receive raw git commit messages and PR descriptions. Your job is to call the emit_release_notes \
tool with structured entries that a customer would find useful.

What to INCLUDE (only changes a user of this package would notice):
- New CLI commands, flags, or options they can use
- New library APIs or behaviors they can rely on
- Bug fixes that affected their workflows
- Breaking changes to CLI behavior, config format, or public API signatures
- Deprecations of public APIs or CLI flags
- Performance improvements they would notice

What to EXCLUDE (never mention these even if the commit prefix suggests otherwise):
- CI/CD pipeline changes, GitHub Actions workflow updates, release process changes
- Test additions, test fixes, test infrastructure
- Internal refactors that don't change user-facing behavior
- Dependency bumps (dependabot, requirements updates)
- Documentation-only changes (docs, README, wiki, migration guides)
- Build infrastructure, installer build process, CodeBuild/CodePipeline changes
- Merge commits, chore commits, code quality/linting changes
- Changes to internal modules that aren't part of the public API
- Telemetry/analytics changes that are invisible to the user

When in doubt, ask: "Would a user of this package notice this change?" If no, skip it.

Writing style:
- Write from the user's perspective — what changed FOR THEM.
- For breaking changes, explain what the user needs to do differently.
- For features, explain what the user can now do.
- For bug fixes, explain what was broken and that it's now fixed.
- Keep descriptions concise — one or two sentences max.
- Use the PR number as reference when available (e.g. "(#1234)"), otherwise use short commit hash.
- Combine related commits into a single entry when they're part of the same feature/fix.
- ALWAYS call the tool. Never respond with plain text."""


def run_git(*args: str) -> str:
result = subprocess.run(
["git", *args], capture_output=True, text=True, check=True
)
return result.stdout.strip()


def get_latest_tag() -> str:
return run_git("describe", "--tags", "--abbrev=0")


def get_readme() -> str:
for name in ("README.md", "README.rst", "README.txt", "README"):
if os.path.isfile(name):
with open(name) as f:
return f.read(1000)
return ""


def get_commits_since_tag(tag: str) -> list[dict]:
log = run_git(
"log", f"{tag}..HEAD",
"--pretty=format:%H%x00%s%x00%b%x1e",
)
if not log:
return []

commits = []
for entry in log.split("\x1e"):
entry = entry.strip()
if not entry:
continue
parts = entry.split("\x00", 2)
if len(parts) < 2:
continue
commits.append({
"hash": parts[0][:7],
"subject": parts[1],
"body": parts[2] if len(parts) > 2 else "",
})
return commits


def get_pr_descriptions(commits: list[dict]) -> dict[str, str]:
"""Try to fetch PR descriptions via gh CLI for commits that reference PRs."""
pr_numbers = set()
for c in commits:
for m in re.findall(r"#(\d+)", c["subject"]):
pr_numbers.add(m)

descriptions = {}
for pr in pr_numbers:
result = subprocess.run(
["gh", "pr", "view", pr, "--json", "body,title", "-q", ".title + \"\\n\" + .body"],
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0 and result.stdout.strip():
descriptions[pr] = result.stdout.strip()[:2000]
return descriptions


def build_input_text(commits: list[dict], pr_descriptions: dict[str, str]) -> str:
lines = []
for c in commits:
pr_match = re.search(r"#(\d+)", c["subject"])
pr_num = pr_match.group(1) if pr_match else None

lines.append(f"COMMIT {c['hash']}: {c['subject']}")
if c["body"]:
lines.append(f" Body: {c['body'][:500]}")
if pr_num and pr_num in pr_descriptions:
lines.append(f" PR #{pr_num} description: {pr_descriptions[pr_num][:1000]}")
lines.append("")

return "\n".join(lines)


def build_system_prompt(repo_name: str) -> str:
readme = get_readme()
readme_section = (
f"Here is the README for context on what this project does:\n<readme>\n{readme}\n</readme>"
if readme else ""
)
return SYSTEM_PROMPT.format(repo_name=repo_name, readme_section=readme_section)


def invoke_bedrock(input_text: str, region: str, repo_name: str) -> list[dict]:
client = boto3.client("bedrock-runtime", region_name=region)

response = client.invoke_model(
modelId=MODEL_ID,
contentType="application/json",
accept="application/json",
body=json.dumps({
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 4096,
"system": build_system_prompt(repo_name),
"tools": [TOOL_DEFINITION],
"tool_choice": {"type": "tool", "name": "emit_release_notes"},
"messages": [{"role": "user", "content": input_text}],
}),
)

body = json.loads(response["body"].read())

for block in body.get("content", []):
if block.get("type") == "tool_use" and block.get("name") == "emit_release_notes":
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a little confused about how we are using the emit_release_notes here.

I don't see we have any tool method naming emit_release_notes and seems what we do if the tool is needed is just to return the input's entries ? What's usage of this tool ?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

emit_release_notes is defined in the TOOL_DEFINITION above.

LLMs take a text prompt and a list of "tool definitions" as input and return text and "tool calls" in response. Tool definitions are just JSON schemas and tool calls are just JSON. They're usually wired up to a function call so if the LLM returns a tool call, a function gets called. But more precisely, they're just a mechanism for LLMs to return structured JSON. Here, we're modeling changelog items as tool calls we can be sure each item matches one of the categories and has a reference with it.

return block["input"]["entries"]

raise RuntimeError(f"No tool_use block in response: {json.dumps(body, indent=2)}")


def render_changelog(version: str, entries: list[dict]) -> str:
"""Render entries into the same markdown format as CHANGELOG.md."""
sections = {
"breaking": ("BREAKING CHANGES", []),
"deprecations": ("DEPRECATIONS", []),
"features": ("Features", []),
"bug fixes": ("Bug Fixes", []),
}

for entry in entries:
cat = entry["category"]
if cat in sections:
sections[cat][1].append(entry)

lines = [f"## {version} ({date.today().isoformat()})"]

for key in ["breaking", "deprecations", "features", "bug fixes"]:
title, items = sections[key]
if not items:
continue
lines.append("")
lines.append(f"### {title}")
for item in items:
ref = f" {item['reference']}" if item.get("reference") else ""
lines.append(f"* {item['description']}{ref}")

lines.append("")
return "\n".join(lines)


def main():
parser = argparse.ArgumentParser(description="Generate release notes using Bedrock")
parser.add_argument("version", nargs="?", default="UNRELEASED")
parser.add_argument("--repo", default=None, help="Repository name (default: inferred from git remote)")
parser.add_argument("--since", help="Git tag to diff from (default: latest tag)")
parser.add_argument("--region", default="us-west-2", help="AWS region for Bedrock")
parser.add_argument("--json", action="store_true", help="Output raw JSON from tool call")
parser.add_argument("--dry-run", action="store_true", help="Show input to LLM without calling Bedrock")
args = parser.parse_args()

if args.repo:
repo_name = args.repo
else:
remote = run_git("remote", "get-url", "origin")
repo_name = remote.rstrip(".git").rsplit("/", 1)[-1]

tag = args.since or get_latest_tag()
print(f"Generating release notes for {repo_name} since {tag}...", file=sys.stderr)

commits = get_commits_since_tag(tag)
if not commits:
print("No commits since last tag.", file=sys.stderr)
return

print(f"Found {len(commits)} commits. Fetching PR descriptions...", file=sys.stderr)
pr_descriptions = get_pr_descriptions(commits)
print(f"Fetched {len(pr_descriptions)} PR descriptions.", file=sys.stderr)

input_text = build_input_text(commits, pr_descriptions)

if args.dry_run:
print(input_text)
return

print("Invoking Bedrock...", file=sys.stderr)
entries = invoke_bedrock(input_text, args.region, repo_name)

if args.json:
print(json.dumps(entries, indent=2))
else:
print(render_changelog(args.version, entries))


if __name__ == "__main__":
main()
41 changes: 32 additions & 9 deletions .github/workflows/reusable_bump.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
runs-on: ubuntu-latest
environment: release
permissions:
id-token: write
contents: write
pull-requests: write
steps:
Expand All @@ -24,16 +25,34 @@ jobs:
fetch-depth: 0
token: ${{ secrets.CI_TOKEN }}

- name: Checkout release notes script
uses: actions/checkout@v4
with:
repository: aws-deadline/.github
ref: mainline
path: .dot-github
sparse-checkout: .github/scripts/generate_release_notes.py

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.9'
python-version: '3.13'

- name: Install dependencies
run: pip install boto3

- name: ConfigureGit
run: |
git config --local user.email ${{secrets.EMAIL}}
git config --local user.name ${{secrets.USER}}

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.AWS_PGP_KEY_SECRET_ROLE }}
aws-region: us-west-2
mask-aws-account-id: true

- name: NextSemver
run: |
BUMP_ARGS=""
Expand All @@ -56,13 +75,23 @@ jobs:

- name: GenerateChangeLog
run: |
# Grab the new version's changelog and prepend it to the original changelog contents
python .github/scripts/get_latest_changelog.py > NEW_LOG.md
PREV_TAG=$(git describe --tags --abbrev=0)
NOTES=$(python .dot-github/.github/scripts/generate_release_notes.py "$NEXT_SEMVER" --since "$PREV_TAG" --repo "${{ github.event.repository.name }}")

# Prepend new release notes to the original changelog
echo "$NOTES" > NEW_LOG.md
cat NEW_LOG.md CHANGELOG.bak.md > CHANGELOG.md
rm NEW_LOG.md

git add CHANGELOG.md

# Save for PR body
{
echo 'RELEASE_NOTES<<EOF'
echo "$NOTES"
echo 'EOF'
} >> $GITHUB_ENV

# A precommit hook into the GitHub Action workflow.
# If the action .github/actions/bump_precommit_hook exists in the repository
# that is using this workflow, then this will run the workflow defined in that file.
Expand All @@ -77,12 +106,6 @@ jobs:
run: |
git commit -sm "chore(release): $NEXT_SEMVER"

{
echo 'RELEASE_NOTES<<EOF'
python .github/scripts/get_latest_changelog.py
echo EOF
} >> $GITHUB_ENV

- name: PushPR
env:
GH_TOKEN: ${{ secrets.CI_TOKEN }}
Expand Down
Loading