diff --git a/github_metrics/metrics/merge_rate.py b/github_metrics/metrics/merge_rate.py index 2e04872..8688723 100644 --- a/github_metrics/metrics/merge_rate.py +++ b/github_metrics/metrics/merge_rate.py @@ -6,7 +6,8 @@ def format_pr_list(pr_list): return [ { "author": get_author_login(pr), - "merged_at": extract_datetime_or_none(pr.get("mergedAt")) + "merged_at": extract_datetime_or_none(pr.get("mergedAt")), + "created_at": extract_datetime_or_none(pr.get("createdAt")) if pr.get("mergedAt") else None, } diff --git a/github_metrics/metrics/open_to_merge.py b/github_metrics/metrics/open_to_merge.py index c7cabcd..547a18a 100644 --- a/github_metrics/metrics/open_to_merge.py +++ b/github_metrics/metrics/open_to_merge.py @@ -1,4 +1,5 @@ import numpy +import arrow from github_metrics.common import extract_datetime_or_none, get_author_login from github_metrics.helpers import ( @@ -24,8 +25,14 @@ def format_pr_list(pr_list): "title": pr["title"], "author": get_author_login(pr), "created_at": extract_datetime_or_none(pr.get("createdAt")), - "merged_at": extract_datetime_or_none(pr.get("mergedAt")) - if pr.get("mergedAt") + "merged_at": extract_datetime_or_none(pr.get("mergedAt")), + "duration_in_hours": format_timedelta_to_hours( + get_time_without_weekend( + arrow.get(pr["createdAt"]), + arrow.get(pr["mergedAt"]) + ) + ) + if extract_datetime_or_none(pr.get("createdAt")) and extract_datetime_or_none(pr.get("mergedAt")) else None, } for pr in pr_list @@ -63,6 +70,9 @@ def get_open_to_merge_time_data( "mean": mean, "median": median, "percentile_95": percentile, + "mean_duration_in_hours": mean.total_seconds() / 3600, + "median_duration_in_hours": median.total_seconds() / 3600, + "percentile_95_duration_in_hours": percentile.total_seconds() / 3600, "total_prs": merged_pr_list, "merged_pr_rate": merged_pr_rate, } diff --git a/github_metrics/metrics/pr_size.py b/github_metrics/metrics/pr_size.py index 7fb427c..afd2ec0 100644 --- a/github_metrics/metrics/pr_size.py +++ b/github_metrics/metrics/pr_size.py @@ -1,3 +1,4 @@ +from github_metrics.common import get_author_login import numpy from github_metrics.helpers import filter_valid_prs @@ -8,6 +9,7 @@ def format_pr_list(pr_list): { "additions": pr["additions"], "deletions": pr["deletions"], + "author": get_author_login(pr), } for pr in pr_list ] diff --git a/github_metrics/metrics/time_to_merge.py b/github_metrics/metrics/time_to_merge.py index 89e796d..b93b13c 100644 --- a/github_metrics/metrics/time_to_merge.py +++ b/github_metrics/metrics/time_to_merge.py @@ -80,11 +80,13 @@ def get_time_to_merge_data( mean = numpy.mean(time_to_merge_list) median = numpy.median(time_to_merge_list) percentile = numpy.percentile(time_to_merge_list, 95) - return { "mean": mean, "median": median, "percentile_95": percentile, + "mean_duration_in_hours": mean.total_seconds() / 3600, + "median_duration_in_hours": median.total_seconds() / 3600, + "percentile_95_duration_in_hours": percentile.total_seconds() / 3600, "merged_prs": merged_prs, } diff --git a/github_metrics/metrics/time_to_open.py b/github_metrics/metrics/time_to_open.py index 922d45f..2736dce 100644 --- a/github_metrics/metrics/time_to_open.py +++ b/github_metrics/metrics/time_to_open.py @@ -70,6 +70,9 @@ def get_time_to_open_data( "mean": mean, "median": median, "percentile_95": percentile, + "mean_duration_in_hours": mean.total_seconds() / 3600, + "median_duration_in_hours": median.total_seconds() / 3600, + "percentile_95_duration_in_hours": percentile.total_seconds() / 3600, "total_prs": formatted_pr_list, } diff --git a/github_metrics/metrics/time_to_review.py b/github_metrics/metrics/time_to_review.py index a3aa7b6..d658482 100644 --- a/github_metrics/metrics/time_to_review.py +++ b/github_metrics/metrics/time_to_review.py @@ -22,6 +22,16 @@ def get_reviews_from_pr(pr): return reviews +def get_comments_from_pr(pr): + comments_root = pr.get("comments") + + if not comments_root: + return [] + + comments = comments_root.get("nodes", []) + return comments + + def get_first_review(pr): pr_author_login = get_author_login(pr) @@ -36,6 +46,53 @@ def get_first_review(pr): return different_author_reviews[0] +def get_comments_from_pr_review(pr): + reviews_root = pr.get("reviews") + if not reviews_root: + return [] + + nodes = reviews_root.get("nodes", []) + edges = reviews_root.get("edges", []) + edge_comments = [ + {"login": get_author_login(e["node"]), "comment": e["node"]["body"]} + for e in edges if e.get("node", {}).get("body") + ] + node_comments = [ + {"login": get_author_login(c), "comment": c["body"]} + for n in nodes + for c in n.get("comments", {"nodes": []}).get("nodes") if c.get("body") + ] + return node_comments + edge_comments + + +def get_reviewers_and_comments(pr): + pr_author_login = get_author_login(pr) + comments = get_comments_from_pr(pr) + comments_from_pr_review = get_comments_from_pr_review(pr) + + different_author_reviews = [ + r["login"] for r in comments_from_pr_review if pr_author_login != r["login"] + ] + different_author_comments = [ + get_author_login(r) for r in comments if pr_author_login != get_author_login(r) + ] + + reviewers = different_author_comments + different_author_reviews + reviewers = list(set(reviewers)) + if not reviewers: + return + + reviewers_and_comments = [] + for reviewer in reviewers: + reviewer_comments = [c.get("body") for c in comments if get_author_login(c) == reviewer] + reviewer_pr_reviews = [i["comment"] for i in comments_from_pr_review if i["login"] == reviewer] + + reviewers_and_comments.append({ + "login": reviewer, + "comments": reviewer_pr_reviews + reviewer_comments, + }) + return reviewers_and_comments + def hours_without_review(pr): open_date = extract_datetime_or_none(pr["created_at"]) @@ -63,6 +120,14 @@ def format_pr_list(pr_list): ) if get_first_review(pr) else None, + "reviewers": get_reviewers_and_comments(pr), + "duration_in_hours": format_timedelta_to_hours( + get_time_without_weekend( + arrow.get(pr["createdAt"]), + arrow.get(get_first_review(pr).get("createdAt")) + ) + ) if get_first_review(pr) and pr["createdAt"] + else None } for pr in pr_list ] @@ -106,6 +171,9 @@ def get_time_to_review_data( "mean": mean, "median": median, "percentile_95": percentile, + "mean_duration_in_hours": mean.total_seconds() / 3600, + "median_duration_in_hours": median.total_seconds() / 3600, + "percentile_95_duration_in_hours": percentile.total_seconds() / 3600, "total_prs": formatted_pr_list, "unreviewed_prs": unreviewed_prs, "prs_over_24h": prs_over_24h, diff --git a/github_metrics/request.py b/github_metrics/request.py index 7efd219..b8621ca 100644 --- a/github_metrics/request.py +++ b/github_metrics/request.py @@ -1,3 +1,4 @@ +import random from time import sleep import requests @@ -44,14 +45,46 @@ def format_request_for_github(cursor=None): headRefName additions deletions + comments(first: 100) {{ + nodes {{ + author {{ + login + }} + body + }} + edges {{ + node {{ + author {{ + login + }} + body + }} + }} + }} reviews(first: 10) {{ nodes {{ createdAt state + comments(first: 100) {{ + nodes {{ + body + author {{ + login + }} + }} + }} author {{ login }} }} + edges {{ + node {{ + body + author {{ + login + }} + }} + }} }} author {{ login @@ -93,8 +126,10 @@ def fetch_prs_between(start_date, end_date): current_date = None cursor = None has_next_page = True - + timeout = [25, 15, 5, 30] while has_next_page and (not current_date or current_date > start_date): + i = random.choice(timeout) + sleep(i) response = requests.post( "https://api.github.com/graphql", auth=HTTPBasicAuth(GITHUB_LOGIN, GITHUB_TOKEN),