Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions augur/tasks/github/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ def collect_events(repo_git: str, full_collection: bool):

def bulk_events_collection_endpoint_contains_all_data(key_auth, logger, owner, repo):

url = f"https://api.github.com/repos/{owner}/{repo}/issues/events?per_page=100"

github_data_access = GithubDataAccess(key_auth, logger)

url = github_data_access.issues_endpoint_url(owner, repo) + "events?per_page=100"

page_count = github_data_access.get_resource_page_count(url)

if page_count > 300:
Expand Down Expand Up @@ -133,11 +133,11 @@ def collect(self, repo_git, key_auth, since):
def _collect_events(self, repo_git: str, key_auth, since):

owner, repo = get_owner_repo(repo_git)

url = f"https://api.github.com/repos/{owner}/{repo}/issues/events"

github_data_access = GithubDataAccess(key_auth, self._logger)

url = github_data_access.issues_endpoint_url(owner, repo) + "events"

for event in github_data_access.paginate_resource(url):

yield event
Expand Down Expand Up @@ -314,7 +314,7 @@ def _collect_and_process_issue_events(self, owner, repo, repo_id, key_auth, sinc

issue_number = issue["issue_number"]

event_url = f"https://api.github.com/repos/{owner}/{repo}/issues/{issue_number}/events"
event_url = github_data_access.issues_endpoint_url(owner, repo) + f"{issue_number}/events"

try:

Expand Down Expand Up @@ -377,7 +377,7 @@ def _collect_and_process_pr_events(self, owner, repo, repo_id, key_auth, since):

pr_number = pr["gh_pr_number"]

event_url = f"https://api.github.com/repos/{owner}/{repo}/issues/{pr_number}/events"
event_url = github_data_access.issues_endpoint_url(owner, repo) + f"{pr_number}/events"

try:

Expand Down
9 changes: 3 additions & 6 deletions augur/tasks/github/facade_github/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,6 @@ def query_github_contributors(logger, key_auth, github_url, tool_source:str, too
logger.error(f"Encountered bad url: {github_url}")
raise e

# Set the base of the url and place to hold contributors to insert
contributors_url = (
f"https://api.github.com/repos/{owner}/{name}/" +
"contributors?state=all"
)

# Get contributors that we already have stored
# Set our duplicate and update column map keys (something other than PK) to
# check dupicates/needed column updates with
Expand All @@ -42,6 +36,9 @@ def query_github_contributors(logger, key_auth, github_url, tool_source:str, too

github_data_access = GithubDataAccess(key_auth, logger)

# Set the base of the url and place to hold contributors to insert
contributors_url = github_data_access.contributors_endpoint_url(owner, repo) + "?state=all"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
E0602: Undefined variable 'repo' (undefined-variable)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this commit shouldnt be here....


contributor_count = github_data_access.get_resource_count(contributors_url)

logger.info("Count of contributors needing insertion: " + str(contributor_count) + "\n")
Expand Down
6 changes: 2 additions & 4 deletions augur/tasks/github/facade_github/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,9 @@ def process_commit_metadata(logger, auth, contributorQueue, repo_id, platform_id
f"Could not create new unresolved email {email}. Error: {e}")
# move on to the next contributor
continue

url = ("https://api.github.com/users/" + login)


try:
user_data = github_data_access.get_resource(url)
user_data = github_data_access.get_user(login)
except UrlNotFoundException as e:
logger.warning(f"User of {login} not found on github. Skipping...")
continue
Expand Down
8 changes: 4 additions & 4 deletions augur/tasks/github/issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: Git

logger.info(f"Collecting issues for {owner}/{repo}")

url = f"https://api.github.com/repos/{owner}/{repo}/issues?state=all"
github_data_access = GithubDataAccess(key_auth, logger)

url = github_data_access.issues_endpoint_url(owner, repo, trailing_slash=False) + "?state=all"

if since:
url += f"&since={since.isoformat()}"

github_data_access = GithubDataAccess(key_auth, logger)


num_pages = github_data_access.get_resource_page_count(url)
logger.info(f"{owner}/{repo}: Retrieving {num_pages} pages of issues")

Expand Down
6 changes: 3 additions & 3 deletions augur/tasks/github/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,17 @@ def fast_retrieve_all_pr_and_issue_messages(repo_git: str, logger, key_auth, tas

owner, repo = get_owner_repo(repo_git)

github_data_access = GithubDataAccess(key_auth, logger)

# url to get issue and pull request comments
url = f"https://api.github.com/repos/{owner}/{repo}/issues/comments"
url = github_data_access.issues_endpoint_url(owner, repo) + "comments"

if since:
url += f"?since={since.isoformat()}"

# define logger for task
logger.info(f"Collecting github comments for {owner}/{repo}")

github_data_access = GithubDataAccess(key_auth, logger)

message_count = github_data_access.get_resource_count(url)

logger.info(f"{task_name}: Collecting {message_count} github messages")
Expand Down
119 changes: 119 additions & 0 deletions augur/tasks/github/util/github_data_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from urllib.parse import urlparse, parse_qs, urlencode
from keyman.KeyClient import KeyClient
from augur.util.keys import mask_key
import urllib.parse

GITHUB_RATELIMIT_REMAINING_CAP = 50

Expand Down Expand Up @@ -35,6 +36,114 @@ def __init__(self, message="Resource returned HTTP 410 Gone. It is likely intent
super().__init__(message)

class GithubDataAccess:
"""Utilities for accessing the GitHub REST API

Public facing functions in this class should refrain from returning data in a structure
that is derived from githubs API responses to keep all platform-specific parsing here.
"""

def _base_domain(self) -> str:
"""the base domain against which api calls are assembled

Returns:
str: the base domain by itself
"""
return "github.com"

def _base_url(self) -> str:
"""the github base URL with HTTP scheme and trailing slash, suitable for building specific API urls.

Returns:
str: the base url
"""
return f"https://api.{self._base_domain()}/"

def issues_endpoint_url(self, owner:str, repo:str, trailing_slash = True) -> str:
"""the github REST API url for the issues endpoint

Args:
owner (str): the owner/org of the repo
repo (str): the repo name
trailing_slash (bool, optional): Whether to include the trailing slash or not. Defaults to True.

Returns:
str: the assembled URL with values filled in. Example: https://api.github.com/repos/owner/repo/issues/
"""

return f"{self._base_url()}repos/{owner}/{repo}/issues" + ("/" if trailing_slash else "")

def contributors_endpoint_url(self, owner:str, repo:str, trailing_slash = True) -> str:
"""the github REST API url for the contributors endpoint

Args:
owner (str): the owner/org of the repo
repo (str): the repo name
trailing_slash (bool, optional): Whether to include the trailing slash or not. Defaults to True.

Returns:
str: the assembled URL with values filled in. Example: https://api.github.com/repos/owner/repo/contributors/
"""

return f"{self._base_url()}repos/{owner}/{repo}/contributors" + ("/" if trailing_slash else "")

def user_endpoint_url(self, username:str, trailing_slash = True) -> str:
"""the github REST API url for the users endpoint

Args:
username (str): the github username to query
trailing_slash (bool, optional): Whether to include the trailing slash or not. Defaults to True.

Returns:
str: the assembled URL with values filled in. Example: https://api.github.com/repos/owner/repo/contributors/
"""

return f"{self._base_url()}users/{username}" + ("/" if trailing_slash else "")

def user_endpoint_urls(self, username:str) -> dict:
"""the github REST API urls beneath the users endpoint, in dict form.
Intended to enable the recreation of a subset of what is returned by the github API

Args:
username (str): the github username to query

Returns:
dict: a dict of various user sub urls like would be returned by github's API.
"""
user_url = self.user_endpoint_url(username, trailing_slash=False)
return {
"url": user_url,
"html_url": f"https://github.com/{username}",
"followers_url": f"{user_url}/followers",
"following_url": user_url + "/following{/other_user}",
"gists_url": user_url + "/gists{/gist_id}",
"starred_url": user_url + "/starred{/owner}{/repo}",
"subscriptions_url": f"{user_url}/subscriptions",
"organizations_url": f"{user_url}/orgs",
"repos_url": f"{user_url}/repos",
"events_url": user_url + "/events{/privacy}",
"received_events_url": f"{user_url}/received_events",
}


def search_endpoint(self, topic: str, query: str) -> str:
"""construct a github API call to perform a search

Args:
topic (str): the topic to search. Valid options are: users, code, commits, issues, labels, repositories, topics.
query (str): the query string to search as you'd type it into githubs serach bar. Example: "email@example.com in:email type:user"

Raises:
ValueError: if an invalid topic is provided

Returns:
str: a URL that can be queried to perform the search
"""
topic = topic.lower()
if topic not in ["users", "code", "commits", "issues", "labels", "repositories", "topics" ]:
raise ValueError(f"Invalid topic '{topic}' provided for searching github.")

return f"{self._base_url()}search/{topic}?q={urllib.parse.quote(query)}"


def __init__(self, key_manager, logger: logging.Logger, feature="rest"):

Expand All @@ -44,6 +153,16 @@ def __init__(self, key_manager, logger: logging.Logger, feature="rest"):
self.key = None
self.expired_keys_for_request = []


def get_user(self, username:str):
url = self.user_endpoint_url(username)

return self.get_resource(url)

def perform_search(self, topic: str, query: str):
url = self.search_endpoint(topic, query)
return self.get_resource(url)

def get_resource_count(self, url):

# set per_page to 100 explicitly so we know each page is 100 long
Expand Down
Loading