Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion github_metrics/metrics/merge_rate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ def format_pr_list(pr_list):
return [
{
"author": get_author_login(pr),
"merged_at": extract_datetime_or_none(pr.get("mergedAt"))
"merged_at": extract_datetime_or_none(pr.get("mergedAt")),
"created_at": extract_datetime_or_none(pr.get("createdAt"))
if pr.get("mergedAt")
else None,
}
Expand Down
14 changes: 12 additions & 2 deletions github_metrics/metrics/open_to_merge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy
import arrow

from github_metrics.common import extract_datetime_or_none, get_author_login
from github_metrics.helpers import (
Expand All @@ -24,8 +25,14 @@ def format_pr_list(pr_list):
"title": pr["title"],
"author": get_author_login(pr),
"created_at": extract_datetime_or_none(pr.get("createdAt")),
"merged_at": extract_datetime_or_none(pr.get("mergedAt"))
if pr.get("mergedAt")
"merged_at": extract_datetime_or_none(pr.get("mergedAt")),
"duration_in_hours": format_timedelta_to_hours(
get_time_without_weekend(
arrow.get(pr["createdAt"]),
arrow.get(pr["mergedAt"])
)
)
if extract_datetime_or_none(pr.get("createdAt")) and extract_datetime_or_none(pr.get("mergedAt"))
else None,
}
for pr in pr_list
Expand Down Expand Up @@ -63,6 +70,9 @@ def get_open_to_merge_time_data(
"mean": mean,
"median": median,
"percentile_95": percentile,
"mean_duration_in_hours": mean.total_seconds() / 3600,
"median_duration_in_hours": median.total_seconds() / 3600,
"percentile_95_duration_in_hours": percentile.total_seconds() / 3600,
"total_prs": merged_pr_list,
"merged_pr_rate": merged_pr_rate,
}
Expand Down
2 changes: 2 additions & 0 deletions github_metrics/metrics/pr_size.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from github_metrics.common import get_author_login
import numpy

from github_metrics.helpers import filter_valid_prs
Expand All @@ -8,6 +9,7 @@ def format_pr_list(pr_list):
{
"additions": pr["additions"],
"deletions": pr["deletions"],
"author": get_author_login(pr),
}
for pr in pr_list
]
Expand Down
4 changes: 3 additions & 1 deletion github_metrics/metrics/time_to_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,13 @@ def get_time_to_merge_data(
mean = numpy.mean(time_to_merge_list)
median = numpy.median(time_to_merge_list)
percentile = numpy.percentile(time_to_merge_list, 95)

return {
"mean": mean,
"median": median,
"percentile_95": percentile,
"mean_duration_in_hours": mean.total_seconds() / 3600,
"median_duration_in_hours": median.total_seconds() / 3600,
"percentile_95_duration_in_hours": percentile.total_seconds() / 3600,
"merged_prs": merged_prs,
}

Expand Down
3 changes: 3 additions & 0 deletions github_metrics/metrics/time_to_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def get_time_to_open_data(
"mean": mean,
"median": median,
"percentile_95": percentile,
"mean_duration_in_hours": mean.total_seconds() / 3600,
"median_duration_in_hours": median.total_seconds() / 3600,
"percentile_95_duration_in_hours": percentile.total_seconds() / 3600,
"total_prs": formatted_pr_list,
}

Expand Down
68 changes: 68 additions & 0 deletions github_metrics/metrics/time_to_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ def get_reviews_from_pr(pr):

return reviews

def get_comments_from_pr(pr):
comments_root = pr.get("comments")

if not comments_root:
return []

comments = comments_root.get("nodes", [])
return comments



def get_first_review(pr):
pr_author_login = get_author_login(pr)
Expand All @@ -36,6 +46,53 @@ def get_first_review(pr):

return different_author_reviews[0]

def get_comments_from_pr_review(pr):
reviews_root = pr.get("reviews")
if not reviews_root:
return []

nodes = reviews_root.get("nodes", [])
edges = reviews_root.get("edges", [])
edge_comments = [
{"login": get_author_login(e["node"]), "comment": e["node"]["body"]}
for e in edges if e.get("node", {}).get("body")
]
node_comments = [
{"login": get_author_login(c), "comment": c["body"]}
for n in nodes
for c in n.get("comments", {"nodes": []}).get("nodes") if c.get("body")
]
return node_comments + edge_comments


def get_reviewers_and_comments(pr):
pr_author_login = get_author_login(pr)
comments = get_comments_from_pr(pr)
comments_from_pr_review = get_comments_from_pr_review(pr)

different_author_reviews = [
r["login"] for r in comments_from_pr_review if pr_author_login != r["login"]
]
different_author_comments = [
get_author_login(r) for r in comments if pr_author_login != get_author_login(r)
]

reviewers = different_author_comments + different_author_reviews
reviewers = list(set(reviewers))
if not reviewers:
return

reviewers_and_comments = []
for reviewer in reviewers:
reviewer_comments = [c.get("body") for c in comments if get_author_login(c) == reviewer]
reviewer_pr_reviews = [i["comment"] for i in comments_from_pr_review if i["login"] == reviewer]

reviewers_and_comments.append({
"login": reviewer,
"comments": reviewer_pr_reviews + reviewer_comments,
})
return reviewers_and_comments


def hours_without_review(pr):
open_date = extract_datetime_or_none(pr["created_at"])
Expand Down Expand Up @@ -63,6 +120,14 @@ def format_pr_list(pr_list):
)
if get_first_review(pr)
else None,
"reviewers": get_reviewers_and_comments(pr),
"duration_in_hours": format_timedelta_to_hours(
get_time_without_weekend(
arrow.get(pr["createdAt"]),
arrow.get(get_first_review(pr).get("createdAt"))
)
) if get_first_review(pr) and pr["createdAt"]
else None
}
for pr in pr_list
]
Expand Down Expand Up @@ -106,6 +171,9 @@ def get_time_to_review_data(
"mean": mean,
"median": median,
"percentile_95": percentile,
"mean_duration_in_hours": mean.total_seconds() / 3600,
"median_duration_in_hours": median.total_seconds() / 3600,
"percentile_95_duration_in_hours": percentile.total_seconds() / 3600,
"total_prs": formatted_pr_list,
"unreviewed_prs": unreviewed_prs,
"prs_over_24h": prs_over_24h,
Expand Down
37 changes: 36 additions & 1 deletion github_metrics/request.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
from time import sleep

import requests
Expand Down Expand Up @@ -44,14 +45,46 @@ def format_request_for_github(cursor=None):
headRefName
additions
deletions
comments(first: 100) {{
nodes {{
author {{
login
}}
body
}}
edges {{
node {{
author {{
login
}}
body
}}
}}
}}
reviews(first: 10) {{
nodes {{
createdAt
state
comments(first: 100) {{
nodes {{
body
author {{
login
}}
}}
}}
author {{
login
}}
}}
edges {{
node {{
body
author {{
login
}}
}}
}}
}}
author {{
login
Expand Down Expand Up @@ -93,8 +126,10 @@ def fetch_prs_between(start_date, end_date):
current_date = None
cursor = None
has_next_page = True

timeout = [25, 15, 5, 30]
while has_next_page and (not current_date or current_date > start_date):
i = random.choice(timeout)
sleep(i)
response = requests.post(
"https://api.github.com/graphql",
auth=HTTPBasicAuth(GITHUB_LOGIN, GITHUB_TOKEN),
Expand Down