From 24e7984b68b59c10b782aa3f8d3334e1bbad266d Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 29 Apr 2025 19:45:32 -0500 Subject: [PATCH 1/3] improve ratelimit logging --- augur/tasks/github/util/github_data_access.py | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index 0eda1c8462..223f8d0bc7 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -10,15 +10,18 @@ class RatelimitException(Exception): - def __init__(self, response, message="Github Rate limit exceeded") -> None: + def __init__(self, response, keys_used, message="Github Rate limit exceeded") -> None: self.response = response - super().__init__(message) + super().__init__(f"{message}. Keys used: {keys_used}") class UrlNotFoundException(Exception): pass +class NotAuthorizedException(Exception): + pass + class GithubDataAccess: def __init__(self, key_manager, logger: logging.Logger): @@ -26,6 +29,7 @@ def __init__(self, key_manager, logger: logging.Logger): self.logger = logger self.key_client = KeyClient("github_rest", logger) self.key = None + self.expired_keys_for_request = [] def get_resource_count(self, url): @@ -105,7 +109,8 @@ def make_request(self, url, method="GET", timeout=100): response = client.request(method=method, url=url, headers=headers, timeout=timeout, follow_redirects=True) if response.status_code in [403, 429]: - raise RatelimitException(response) + self.expired_keys_for_request.append(self.key) + raise RatelimitException(response, self.expired_keys_for_request[-5:]) if response.status_code == 404: raise UrlNotFoundException(f"Could not find {url}") @@ -114,7 +119,8 @@ def make_request(self, url, method="GET", timeout=100): try: if "X-RateLimit-Remaining" in response.headers and int(response.headers["X-RateLimit-Remaining"]) < GITHUB_RATELIMIT_REMAINING_CAP: - raise RatelimitException(response) + self.expired_keys_for_request.append(self.key) + raise RatelimitException(response, self.expired_keys_for_request[-5:]) except ValueError: self.logger.warning(f"X-RateLimit-Remaining was not an integer. Value: {response.headers['X-RateLimit-Remaining']}") @@ -141,14 +147,26 @@ def __make_request_with_retries(self, url, method="GET", timeout=100): """ try: - return self.make_request(url, method, timeout) + result = self.make_request(url, method, timeout) + self.expired_keys_for_request = [] + return result except RatelimitException as e: self.__handle_github_ratelimit_response(e.response) raise e + except NotAuthorizedException as e: + self.expired_keys_for_request = [] + self.__handle_github_not_authorized_response() + raise e + + def __handle_github_not_authorized_response(self): + + self.key = self.key_client.invalidate(self.key) + def __handle_github_ratelimit_response(self, response): headers = response.headers + previous_key = self.key if "Retry-After" in headers: @@ -172,6 +190,9 @@ def __handle_github_ratelimit_response(self, response): else: time.sleep(60) + if previous_key == self.key: + self.logger.error(f"The same key was returned after a request to expire it was sent (key: {self.key[-5:]})") + def __add_query_params(self, url: str, additional_params: dict) -> str: """Add query params to a url. From bc148425e847f7b82deb51a85724033d1ccd92a7 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Tue, 29 Apr 2025 18:32:08 -0500 Subject: [PATCH 2/3] Fix facade task failure - Rollback transaction on failure - Automatically substitute tz for UTC when invalid - Do not reset date on invalid tz Signed-off-by: Ulincsys --- augur/application/db/lib.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 4d10b90117..d586fa113a 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -1,3 +1,4 @@ +import re import time import random import logging @@ -243,6 +244,7 @@ def facade_bulk_insert_commits(logger, records): ) session.commit() except Exception as e: + session.rollback() if len(records) > 1: logger.error(f"Ran into issue when trying to insert commits \n Error: {e}") @@ -257,7 +259,14 @@ def facade_bulk_insert_commits(logger, records): commit_record = records[0] #replace incomprehensible dates with epoch. #2021-10-11 11:57:46 -0500 - placeholder_date = "1970-01-01 00:00:15 -0500" + + # placeholder_date = "1970-01-01 00:00:15 -0500" + placeholder_date = commit_record['author_timestamp'] + + # Reconstruct timezone portion of the date string to UTC + placeholder_date = re.split("[-+]", placeholder_date) + placeholder_date.pop() + placeholder_date = "-".join(placeholder_date) + "+0000" #Check for improper utc timezone offset #UTC timezone offset should be between -14:00 and +14:00 From d55dfe5dc1efda9a9acca3c396a5929dd29b5512 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Wed, 30 Apr 2025 10:53:02 -0500 Subject: [PATCH 3/3] release update Signed-off-by: Sean P. Goggins --- README.md | 4 ++-- docker/backend/Dockerfile | 2 +- docker/database/Dockerfile | 2 +- docker/rabbitmq/Dockerfile | 2 +- metadata.py | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 33e41bcf15..ff7480b702 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.86.0 +# Augur NEW Release v0.86.1 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data - less data carpentry for everyone else! The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot), a public instance of 8Knot is available [here](https://metrix.chaoss.io) - this is tied to a public instance of [Augur](https://ai.chaoss.io). @@ -11,7 +11,7 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o ## NEW RELEASE ALERT! **If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**. -Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.86.0). +Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.86.1). - The `main` branch is a stable version of our new architecture, which features: diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 959d3952f5..8cd663a0a9 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -32,7 +32,7 @@ RUN ./scripts/docker/install-golang-deps.sh FROM python:3.11-slim-bullseye LABEL maintainer="outdoors@acm.org" -LABEL version="0.86.0" +LABEL version="0.86.1" ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/bin/:/usr/local/bin:/usr/lib:${PATH}" diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index e5825ca7bf..9d677fd2be 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -2,7 +2,7 @@ FROM postgres:16 LABEL maintainer="outdoors@acm.org" -LABEL version="0.86.0" +LABEL version="0.86.1" ENV POSTGRES_DB "test" ENV POSTGRES_USER "augur" diff --git a/docker/rabbitmq/Dockerfile b/docker/rabbitmq/Dockerfile index 9b0b3c98c3..6cf5251f7b 100644 --- a/docker/rabbitmq/Dockerfile +++ b/docker/rabbitmq/Dockerfile @@ -1,7 +1,7 @@ FROM rabbitmq:3.12-management-alpine LABEL maintainer="574/augur@simplelogin.com" -LABEL version="0.86.0" +LABEL version="0.86.1" ARG RABBIT_MQ_DEFAULT_USER=augur ARG RABBIT_MQ_DEFAULT_PASSWORD=password123 diff --git a/metadata.py b/metadata.py index 740db52f3d..d94bb3822f 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.86.0" -__release__ = "v0.86.0 (Pod People)" +__version__ = "0.86.1" +__release__ = "v0.86.1 (Pod People)" __license__ = "MIT" __copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Sean Goggins, Brian Warner & Augurlabs 2025"