From c3b08cb870286c5ace0ffb94fd4d4daa13bd1f19 Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Sun, 27 Jul 2025 19:04:50 +0500 Subject: [PATCH 001/105] fix: resolve UniqueViolation error in GitHub releases collection Fix for GitHub Issue #3194 where releases collection was failing with psycopg2.errors.UniqueViolation on releases_pkey constraint. Root cause: GitHub API returns release IDs with trailing spaces that don't match existing trimmed database records. Changes: - Add str().strip() to release_id processing in get_release_inf() - Enhance duplicate detection in insert_release() with proper trimming - Add early duplicate detection to prevent unnecessary database operations Signed-off-by: mohsinm-dev --- augur/tasks/github/releases/core.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py index 239b83dce9..255b34cf89 100644 --- a/augur/tasks/github/releases/core.py +++ b/augur/tasks/github/releases/core.py @@ -23,7 +23,7 @@ def get_release_inf(repo_id, release, tag_only): release_inf = { - 'release_id': release['id'], + 'release_id': str(release['id']).strip(), 'repo_id': repo_id, 'release_name': release['name'], 'release_description': release['description'] if release['description'] is not None else '', @@ -51,7 +51,7 @@ def get_release_inf(repo_id, release, tag_only): author = "nobody" date = "" release_inf = { - 'release_id': release['id'], + 'release_id': str(release['id']).strip(), 'repo_id': repo_id, 'release_name': release['name'], 'release_description': 'tag_only', @@ -67,17 +67,23 @@ def get_release_inf(repo_id, release, tag_only): def insert_release(session, logger, repo_id, owner, release, tag_only = False): - # Get current table values + # Get current table values with proper trimming logger.info('Getting release table values\n') query = session.query(Release.release_id).filter(Release.repo_id == repo_id) - release_id_data = execute_session_query(query, 'all')#pd.read_sql(release_id_data_sql, self.db, params={'repo_id': repo_id}) - release_id_data = [str(r_id).strip() for r_id in release_id_data]#release_id_data.apply(lambda x: x.str.strip()) + release_id_data = execute_session_query(query, 'all') + existing_release_ids = {str(r_id).strip() for r_id in release_id_data} # Put all data together in format of the table logger.info(f'Inserting release for repo with id:{repo_id}, owner:{owner}, release name:{release["name"]}\n') release_inf = get_release_inf(repo_id, release, tag_only) + + # Check if release already exists (with proper trimming) + new_release_id = str(release_inf['release_id']).strip() + if new_release_id in existing_release_ids: + logger.info(f"Release {new_release_id} already exists for repo {repo_id}, skipping insertion\n") + return - #Do an upsert + #Do an upsert with string field cleaning string_fields = ["release_name", "release_description", "release_author", "release_tag_name"] bulk_insert_dicts(logger, release_inf,Release,['release_id'], string_fields=string_fields) From 8539825bb217c388735dfa1bc43d25dc4cee0d51 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Wed, 27 Aug 2025 10:26:01 -0500 Subject: [PATCH 002/105] add date filter to contributer resolution logic queries Signed-off-by: Isaac Milarsky --- augur/tasks/git/facade_tasks.py | 13 ++----------- .../git/util/facade_worker/facade_worker/config.py | 11 +++++++++++ .../facade_worker/facade_worker/utilitymethods.py | 4 ++-- augur/tasks/github/facade_github/tasks.py | 13 ++++++++++--- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index ce03524e0f..f087c9272a 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -112,8 +112,6 @@ def trim_commits_post_analysis_facade_task(repo_git): repo = repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - start_date = facade_helper.get_setting('start_date') - logger.info(f"Generating sequence for repo {repo_id}") repo = get_repo_by_repo_git(repo_git) @@ -123,7 +121,7 @@ def trim_commits_post_analysis_facade_task(repo_git): repo_loc = (f"{absolute_path}/.git") # Grab the parents of HEAD - parent_commits = get_parent_commits_set(repo_loc, start_date) + parent_commits = get_parent_commits_set(repo_loc) # Grab the existing commits from the database existing_commits = get_existing_commits_set(repo_id) @@ -237,7 +235,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - start_date = facade_helper.get_setting('start_date') + start_date = facade_helper.get_last_collected_commit_date(repo_id)#.get_setting('start_date') logger.info(f"Generating sequence for repo {repo_id}") @@ -438,11 +436,6 @@ def generate_analysis_sequence(logger,repo_git, facade_helper): analysis_sequence = [] - #repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git) - #repos = fetchall_data_from_sql_text(repo_list) - - start_date = facade_helper.get_setting('start_date') - #repo_ids = [repo['repo_id'] for repo in repos] #repo_id = repo_ids.pop(0) @@ -473,8 +466,6 @@ def facade_phase(repo_git, full_collection): #repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git) #repos = fetchall_data_from_sql_text(repo_list) - start_date = facade_helper.get_setting('start_date') - #repo_ids = [repo['repo_id'] for repo in repos] #repo_id = repo_ids.pop(0) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index c62034a94e..b65ff7bb69 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -244,6 +244,17 @@ def insert_or_update_data(self, query, **bind_args)-> None: return def inc_repos_processed(self): self.repos_processed += 1 + + def get_last_collected_commit_date(self,repo_id): + commit_date_query = s.sql.text(""" + SELECT cmt_committer_timestamp FROM commits + WHERE repo_id=:repo_id + ORDER BY data_collection_date DESC + LIMIT 1; + """).bindparams(repo_id=repo_id) + + result = execute_sql(commit_date_query).fetchone() + return result[0] """ class FacadeConfig: diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index caae6c02ba..c06614ac7d 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -105,10 +105,10 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name): return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}" -def get_parent_commits_set(absolute_repo_path, start_date): +def get_parent_commits_set(absolute_repo_path): parents = subprocess.Popen(["git --git-dir %s log --ignore-missing " - "--pretty=format:'%%H' --since=%s" % (absolute_repo_path,start_date)], + "--pretty=format:'%%H'" % (absolute_repo_path)], stdout=subprocess.PIPE, shell=True) parent_commits = set(parents.stdout.read().decode("utf-8",errors="ignore").split(os.linesep)) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 1b11f98223..26a01f21ae 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -198,6 +198,10 @@ def insert_facade_contributors(self, repo_git): logger = logging.getLogger(insert_facade_contributors.__name__) repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id + facade_helper = FacadeHelper(logger) + + collection_status = repo.collection_status[0] + last_collected_date = collection_status.facade_data_last_collected # Get all of the commit data's emails and names from the commit table that do not appear # in the contributors table or the contributors_aliases table. @@ -214,6 +218,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE commits.repo_id = :repo_id + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND (NOT EXISTS ( SELECT contributors.cntrb_canonical FROM contributors WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email ) or NOT EXISTS ( SELECT contributors_aliases.alias_email from contributors_aliases where contributors_aliases.alias_email = commits.cmt_author_raw_email) AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name )) @@ -231,6 +236,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE commits.repo_id = :repo_id + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND EXISTS ( SELECT unresolved_commit_emails.email FROM unresolved_commit_emails WHERE unresolved_commit_emails.email = commits.cmt_author_raw_email ) AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name ) GROUP BY @@ -239,7 +245,7 @@ def insert_facade_contributors(self, repo_git): commits.cmt_author_raw_email ORDER BY hash - """).bindparams(repo_id=repo_id) + """).bindparams(repo_id=repo_id,since_date=last_collected_date) #Execute statement with session. result = execute_sql(new_contrib_sql) @@ -257,7 +263,6 @@ def insert_facade_contributors(self, repo_git): logger.debug("DEBUG: Got through the new_contribs") - facade_helper = FacadeHelper(logger) # sql query used to find corresponding cntrb_id's of emails found in the contributor's table # i.e., if a contributor already exists, we use it! resolve_email_to_cntrb_id_sql = s.sql.text(""" @@ -271,6 +276,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND commits.repo_id = :repo_id UNION SELECT DISTINCT @@ -286,7 +292,8 @@ def insert_facade_contributors(self, repo_git): contributors_aliases.alias_email = commits.cmt_author_raw_email AND contributors.cntrb_id = contributors_aliases.cntrb_id AND commits.repo_id = :repo_id - """).bindparams(repo_id=repo_id) + AND (:since_date is NULL OR commits.data_collection_date > :since_date) + """).bindparams(repo_id=repo_id,since_date=last_collected_date) result = execute_sql(resolve_email_to_cntrb_id_sql) From fe85b3f5a27298eb807d42e0711bf5a51c045c3a Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Wed, 27 Aug 2025 10:28:28 -0500 Subject: [PATCH 003/105] dont use start date Signed-off-by: Isaac Milarsky --- augur/tasks/git/facade_tasks.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index f087c9272a..c049b4b831 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -235,8 +235,6 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - start_date = facade_helper.get_last_collected_commit_date(repo_id)#.get_setting('start_date') - logger.info(f"Generating sequence for repo {repo_id}") repo = get_repo_by_repo_id(repo_id) @@ -246,7 +244,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: repo_loc = (f"{absolute_path}/.git") # Grab the parents of HEAD - parent_commits = get_parent_commits_set(repo_loc, start_date) + parent_commits = get_parent_commits_set(repo_loc) # Grab the existing commits from the database existing_commits = get_existing_commits_set(repo_id) From 6a6c76014c64a3a201bddcf4ab41a4a17c1f140a Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 9 Sep 2025 17:30:55 -0500 Subject: [PATCH 004/105] remove unused method Signed-off-by: Isaac Milarsky --- Makefile | 2 +- .../facade_worker/facade_worker/config.py | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 4fe926edc4..485ac13732 100644 --- a/Makefile +++ b/Makefile @@ -129,7 +129,7 @@ test-api: # .PHONY: uv uv: - @ command -v uv >/dev/null 2>&1 || { echo "Installing uv..."; pip install --user uv; } + @ command -v uv >/dev/null 2>&1 || { echo "Installing uv..."; pip3 install uv; } # # Documentation diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index b65ff7bb69..c75329aaff 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -245,16 +245,16 @@ def insert_or_update_data(self, query, **bind_args)-> None: def inc_repos_processed(self): self.repos_processed += 1 - def get_last_collected_commit_date(self,repo_id): - commit_date_query = s.sql.text(""" - SELECT cmt_committer_timestamp FROM commits - WHERE repo_id=:repo_id - ORDER BY data_collection_date DESC - LIMIT 1; - """).bindparams(repo_id=repo_id) - - result = execute_sql(commit_date_query).fetchone() - return result[0] +# def get_last_collected_commit_date(self,repo_id): +# commit_date_query = s.sql.text(""" +# SELECT cmt_committer_timestamp FROM commits +# WHERE repo_id=:repo_id +# ORDER BY data_collection_date DESC +# LIMIT 1; +# """).bindparams(repo_id=repo_id) +# +# result = execute_sql(commit_date_query).fetchone() +# return result[0] """ class FacadeConfig: From b8e1c79072039ecbbdbf43815907a1844b4901c9 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 16 Sep 2025 15:08:02 -0400 Subject: [PATCH 005/105] bump rabbit image to 4.1 Signed-off-by: Adrian Edwards --- docker/rabbitmq/Dockerfile | 2 +- docker/rabbitmq/definitions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/rabbitmq/Dockerfile b/docker/rabbitmq/Dockerfile index ad86dfebb7..b4afc5345d 100644 --- a/docker/rabbitmq/Dockerfile +++ b/docker/rabbitmq/Dockerfile @@ -1,4 +1,4 @@ -FROM rabbitmq:3.12-management-alpine +FROM rabbitmq:4.1-management-alpine LABEL maintainer="574/augur@simplelogin.com" LABEL version="0.90.0" diff --git a/docker/rabbitmq/definitions.json b/docker/rabbitmq/definitions.json index 1cd8cc172e..d5fd9faef2 100644 --- a/docker/rabbitmq/definitions.json +++ b/docker/rabbitmq/definitions.json @@ -1,5 +1,5 @@ { - "rabbit_version": "3.12", + "rabbit_version": "4.1", "users": [ { "name": "", From 9ede8af92cf6b23d61d2b963189e3b57531954e7 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Wed, 1 Oct 2025 16:32:27 -0500 Subject: [PATCH 006/105] revert Signed-off-by: Isaac Milarsky --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 485ac13732..4fe926edc4 100644 --- a/Makefile +++ b/Makefile @@ -129,7 +129,7 @@ test-api: # .PHONY: uv uv: - @ command -v uv >/dev/null 2>&1 || { echo "Installing uv..."; pip3 install uv; } + @ command -v uv >/dev/null 2>&1 || { echo "Installing uv..."; pip install --user uv; } # # Documentation From f41e8f812f6966271a3d863ee84c77f3218da242 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Thu, 2 Oct 2025 12:38:06 -0500 Subject: [PATCH 007/105] add toggle to toggle full collection of contributor resolution Signed-off-by: Isaac Milarsky --- .../34_add_facade_config_full_recollect.py | 37 +++++++++++++++++++ .../facade_worker/facade_worker/config.py | 1 + augur/tasks/github/facade_github/tasks.py | 2 +- 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 augur/application/schema/alembic/versions/34_add_facade_config_full_recollect.py diff --git a/augur/application/schema/alembic/versions/34_add_facade_config_full_recollect.py b/augur/application/schema/alembic/versions/34_add_facade_config_full_recollect.py new file mode 100644 index 0000000000..b17a75eaa9 --- /dev/null +++ b/augur/application/schema/alembic/versions/34_add_facade_config_full_recollect.py @@ -0,0 +1,37 @@ +"""change config table to add toggle to force facade to recollect through all commits + +Revision ID: 34 +Revises: 33 +Create Date: 2025-10-02 12:45:57.486871 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy.sql import text + +# revision identifiers, used by Alembic. +revision = '34' +down_revision = '33' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + #Add toggle for facade collection. + conn = op.get_bind() + + conn.execute(text(f""" + INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES ('Facade', 'facade_contributor_full_recollect', '{0}', 'int'); + """)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + conn = op.get_bind() + conn.execute(text(f""" + DELETE FROM "augur_operations"."config" WHERE section_name = 'Facade' AND setting_name = 'facade_contributor_full_recollect' AND type = 'int'; + """)) + # ### end Alembic commands ### diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index c75329aaff..488cdac3f4 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -127,6 +127,7 @@ def __init__(self,logger: Logger): self.rebuild_caches = worker_options["rebuild_caches"] self.multithreaded = worker_options["multithreaded"] self.create_xlsx_summary_files = worker_options["create_xlsx_summary_files"] + self.facade_contributor_full_recollect = worker_options["facade_contributor_full_recollect"] self.tool_source = "Facade" self.data_source = "Git Log" diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 26a01f21ae..3658ef7957 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -201,7 +201,7 @@ def insert_facade_contributors(self, repo_git): facade_helper = FacadeHelper(logger) collection_status = repo.collection_status[0] - last_collected_date = collection_status.facade_data_last_collected + last_collected_date = collection_status.facade_data_last_collected if not facade_helper.facade_contributor_full_recollect else None # Get all of the commit data's emails and names from the commit table that do not appear # in the contributors table or the contributors_aliases table. From c2e87ffd10605ae22a295524c95b102f4ba173f9 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Thu, 2 Oct 2025 13:27:29 -0500 Subject: [PATCH 008/105] amend method of getting collection status record Signed-off-by: Isaac Milarsky --- augur/tasks/github/facade_github/tasks.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 3658ef7957..eff64df6ee 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -8,6 +8,7 @@ from augur.application.db.models import Contributor from augur.tasks.github.facade_github.core import * from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors +from augur.application.db.lib import get_session, execute_session_query from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * @@ -200,8 +201,10 @@ def insert_facade_contributors(self, repo_git): repo_id = repo.repo_id facade_helper = FacadeHelper(logger) - collection_status = repo.collection_status[0] - last_collected_date = collection_status.facade_data_last_collected if not facade_helper.facade_contributor_full_recollect else None + with get_session() as session: + query = session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo.repo_id) + collection_status = execute_session_query(query,'one') + last_collected_date = collection_status.facade_data_last_collected if not facade_helper.facade_contributor_full_recollect else None # Get all of the commit data's emails and names from the commit table that do not appear # in the contributors table or the contributors_aliases table. From 8ae5002c6e43588b19b9da37509c45201d326e22 Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Fri, 3 Oct 2025 14:59:36 -0500 Subject: [PATCH 009/105] add config option in proper place Signed-off-by: Isaac Milarsky --- augur/application/config.py | 3 +- .../34_add_facade_config_full_recollect.py | 37 ------------------- 2 files changed, 2 insertions(+), 38 deletions(-) delete mode 100644 augur/application/schema/alembic/versions/34_add_facade_config_full_recollect.py diff --git a/augur/application/config.py b/augur/application/config.py index 7ace2befd3..009adb69f1 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -50,7 +50,8 @@ def get_development_flag(): "pull_repos": 1, "rebuild_caches": 1, "run_analysis": 1, - "run_facade_contributors": 1 + "run_facade_contributors": 1, + "facade_contributor_full_recollect": 1 }, "Server": { "cache_expire": "3600", diff --git a/augur/application/schema/alembic/versions/34_add_facade_config_full_recollect.py b/augur/application/schema/alembic/versions/34_add_facade_config_full_recollect.py deleted file mode 100644 index b17a75eaa9..0000000000 --- a/augur/application/schema/alembic/versions/34_add_facade_config_full_recollect.py +++ /dev/null @@ -1,37 +0,0 @@ -"""change config table to add toggle to force facade to recollect through all commits - -Revision ID: 34 -Revises: 33 -Create Date: 2025-10-02 12:45:57.486871 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql -from sqlalchemy.sql import text - -# revision identifiers, used by Alembic. -revision = '34' -down_revision = '33' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - #Add toggle for facade collection. - conn = op.get_bind() - - conn.execute(text(f""" - INSERT INTO "augur_operations"."config" ("section_name", "setting_name", "value", "type") VALUES ('Facade', 'facade_contributor_full_recollect', '{0}', 'int'); - """)) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - conn.execute(text(f""" - DELETE FROM "augur_operations"."config" WHERE section_name = 'Facade' AND setting_name = 'facade_contributor_full_recollect' AND type = 'int'; - """)) - # ### end Alembic commands ### From 4bb65549b89407884c3392591f7efd4c99a6b37c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Mon, 6 Oct 2025 21:31:05 +0100 Subject: [PATCH 010/105] Fix examples of repos CSV files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code which parses the repos files interprets the first column as URL and the second one as repo group ID, but all the examples were doing the opposite. Signed-off-by: Mosè Giordano --- augur/application/schema/repo_load_sample.csv | 16 ++++++++-------- .../command-line-interface/db.rst | 16 ++++++++-------- .../test_repos.csv | 16 ++++++++-------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/augur/application/schema/repo_load_sample.csv b/augur/application/schema/repo_load_sample.csv index b04519f30f..fb537d4949 100644 --- a/augur/application/schema/repo_load_sample.csv +++ b/augur/application/schema/repo_load_sample.csv @@ -1,8 +1,8 @@ -10,https://github.com/chaoss/augur.git -10,https://github.com/chaoss/grimoirelab.git -20,https://github.com/chaoss/wg-evolution.git -20,https://github.com/chaoss/wg-risk.git -20,https://github.com/chaoss/wg-common.git -20,https://github.com/chaoss/wg-value.git -20,https://github.com/chaoss/wg-diversity-inclusion.git -20,https://github.com/chaoss/wg-app-ecosystem.git +https://github.com/chaoss/augur.git,10 +https://github.com/chaoss/grimoirelab.git,10 +https://github.com/chaoss/wg-evolution.git,20 +https://github.com/chaoss/wg-risk.git,20 +https://github.com/chaoss/wg-common.git,20 +https://github.com/chaoss/wg-value.git,20 +https://github.com/chaoss/wg-diversity-inclusion.git,20 +https://github.com/chaoss/wg-app-ecosystem.git,20 diff --git a/docs/source/getting-started/command-line-interface/db.rst b/docs/source/getting-started/command-line-interface/db.rst index b754f2e067..a810f1b9d7 100644 --- a/docs/source/getting-started/command-line-interface/db.rst +++ b/docs/source/getting-started/command-line-interface/db.rst @@ -78,14 +78,14 @@ Example usage\: .. code-block:: bash # contents of repos.csv - 10,https://github.com/chaoss/augur.git - 10,https://github.com/chaoss/grimoirelab.git - 20,https://github.com/chaoss/wg-evolution.git - 20,https://github.com/chaoss/wg-risk.git - 20,https://github.com/chaoss/wg-common.git - 20,https://github.com/chaoss/wg-value.git - 20,https://github.com/chaoss/wg-diversity-inclusion.git - 20,https://github.com/chaoss/wg-app-ecosystem.git + https://github.com/chaoss/augur.git,10 + https://github.com/chaoss/grimoirelab.git,10 + https://github.com/chaoss/wg-evolution.git,20 + https://github.com/chaoss/wg-risk.git,20 + https://github.com/chaoss/wg-common.git,20 + https://github.com/chaoss/wg-value.git,20 + https://github.com/chaoss/wg-diversity-inclusion.git,20 + https://github.com/chaoss/wg-app-ecosystem.git,20 # to add repos to the database $ augur db add-repos repos.csv diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv index 8967ae2142..fb537d4949 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv @@ -1,8 +1,8 @@ -10,https://github.com/chaoss/augur.git -10,https://github.com/chaoss/grimoirelab.git -20,https://github.com/chaoss/wg-evolution.git -20,https://github.com/chaoss/wg-risk.git -20,https://github.com/chaoss/wg-common.git -20,https://github.com/chaoss/wg-value.git -20,https://github.com/chaoss/wg-diversity-inclusion.git -20,https://github.com/chaoss/wg-app-ecosystem.git \ No newline at end of file +https://github.com/chaoss/augur.git,10 +https://github.com/chaoss/grimoirelab.git,10 +https://github.com/chaoss/wg-evolution.git,20 +https://github.com/chaoss/wg-risk.git,20 +https://github.com/chaoss/wg-common.git,20 +https://github.com/chaoss/wg-value.git,20 +https://github.com/chaoss/wg-diversity-inclusion.git,20 +https://github.com/chaoss/wg-app-ecosystem.git,20 From 49fe2066ecae2e1fa5e2c734117323cd7fe89476 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= Date: Mon, 6 Oct 2025 21:56:34 +0100 Subject: [PATCH 011/105] docker compose: Make it crystal clear that all the GitHub/GitLab user/keys must be set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The non-interactive Docker compose workflow requires all the variables `AUGUR_GIT{LA,HU}B_{USERNAME,API_KEY}` to be set even if they are going to be unused, otherwise you get stuck waiting for [the prompt](https://github.com/chaoss/augur/blob/b0bb3b80402ee5fcd84bec7334e58a41f9f5ec8a/scripts/install/config.sh#L18-L29) ``` You entered a blank line, are you sure? ``` Signed-off-by: Mosè Giordano --- docs/source/docker/docker-compose.rst | 2 +- docs/source/docker/getting-started.rst | 2 +- docs/source/docker/quick-start.rst | 2 +- docs/source/getting-started/using-docker.rst | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/docker/docker-compose.rst b/docs/source/docker/docker-compose.rst index d96476c392..1bc3a25a6f 100644 --- a/docs/source/docker/docker-compose.rst +++ b/docs/source/docker/docker-compose.rst @@ -26,7 +26,7 @@ This section of the documentation details how to use Augur's Docker Compose conf .. warning:: - Don't forget to provide your external database credentials in a file called ``.env`` file. Make sure the following environment variables are specified. + Don't forget to provide your external database credentials in a file called ``.env`` file. Make sure all the following environment variables are specified, keep placeholder values if you don't need some of them. Don't specify AUGUR_DB if you want the docker database to be used. Example .env: diff --git a/docs/source/docker/getting-started.rst b/docs/source/docker/getting-started.rst index e747bbb304..c413d9ed45 100644 --- a/docs/source/docker/getting-started.rst +++ b/docs/source/docker/getting-started.rst @@ -31,7 +31,7 @@ the following resources (or more): - 10 GB RAM Clone the Augur repository and create a .env file in the top level directory -with the following fields: +with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python diff --git a/docs/source/docker/quick-start.rst b/docs/source/docker/quick-start.rst index b7d7b7cc7e..c7530f6ae9 100644 --- a/docs/source/docker/quick-start.rst +++ b/docs/source/docker/quick-start.rst @@ -9,7 +9,7 @@ Before you get off to such a quick start, go ahead and git checkout main - 4. Create a .env file in the top level directory with the following fields: + 4. Create a .env file in the top level directory with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python diff --git a/docs/source/getting-started/using-docker.rst b/docs/source/getting-started/using-docker.rst index cc5e23896c..c1c693eda2 100644 --- a/docs/source/getting-started/using-docker.rst +++ b/docs/source/getting-started/using-docker.rst @@ -10,7 +10,7 @@ the following resources (or more). 1. Clone the Augur repository https://github.com/chaoss/augur -2. Create a .env file in the top level directory with the following fields: +2. Create a ``.env`` file in the top level directory with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python @@ -35,7 +35,7 @@ or podman compose up --build -And augur should be up and running! Over time, you may decide that you want to download and run newer releases of Augur. It is critical that your `.env` file remains configured to use the same database name and password; though you can change the password if you understand how to connect to a database running inside a Docker container on your computer. +And augur should be up and running! Over time, you may decide that you want to download and run newer releases of Augur. It is critical that your ``.env`` file remains configured to use the same database name and password; though you can change the password if you understand how to connect to a database running inside a Docker container on your computer. Rebuilding Augur in Docker ---------------------------- From a87c6a0e60fcdd3ee919aa14e4d5f77bf540dd69 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 9 Oct 2025 09:14:52 -0500 Subject: [PATCH 012/105] Change facade_contributor_full_recollect to 0 Set default to 0. Signed-off-by: Sean P. Goggins --- augur/application/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index 009adb69f1..ee3c33dc8b 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -51,7 +51,7 @@ def get_development_flag(): "rebuild_caches": 1, "run_analysis": 1, "run_facade_contributors": 1, - "facade_contributor_full_recollect": 1 + "facade_contributor_full_recollect": 0 }, "Server": { "cache_expire": "3600", From fdc00056fafa8bc25800c0f40a5c9c3363f4a534 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 9 Oct 2025 09:57:53 -0500 Subject: [PATCH 013/105] update schema Signed-off-by: Sean P. Goggins --- .../versions/34_add_contrib_to_config.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 augur/application/schema/alembic/versions/34_add_contrib_to_config.py diff --git a/augur/application/schema/alembic/versions/34_add_contrib_to_config.py b/augur/application/schema/alembic/versions/34_add_contrib_to_config.py new file mode 100644 index 0000000000..273265be2f --- /dev/null +++ b/augur/application/schema/alembic/versions/34_add_contrib_to_config.py @@ -0,0 +1,57 @@ +"""Add extra celery options to the config if they do not exist + +Revision ID: 34 +Revises: 33 +Create Date: 2025-10-09 12:03:57.171011 + +""" +from alembic import op +from augur.application.db.session import DatabaseSession +from augur.application.config import * +from sqlalchemy.sql import text +import logging + +# revision identifiers, used by Alembic. +revision = '34' +down_revision = '33' +branch_labels = None +depends_on = None + +logger = logging.getLogger(__name__) + +def upgrade(): + + with DatabaseSession(logger) as session: + config = AugurConfig(logger,session) + config_dict = config.load_config() + + #Update the missing fields of the facade section in the config + section = config_dict.get("Facade") + + #Just copy the default if section doesn't exist. + if section: + if 'facade_contributor_full_recollect' not in section.keys(): + section['facade_contributor_full_recollect'] = 0 + + else: + section = config.default_config["Facade"] + + config.add_section_from_json("Facade", section) + + +def downgrade(): + + conn = op.get_bind() + + conn.execute(text(f""" + DELETE FROM augur_operations.config + WHERE section_name='Facade' AND (setting_name='facade_contributor_full_recollect'); + """)) + + try: + conn.execute(text(f""" + DELETE FROM augur_operations.config + WHERE section_name='Facade' AND (setting_name='facade_contributor_full_recollect'); + """)) + except: + pass \ No newline at end of file From 512ff818eedc8fee1deb627af1cc2c515aea0212 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 9 Oct 2025 10:26:16 -0500 Subject: [PATCH 014/105] fixing description Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/34_add_contrib_to_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/schema/alembic/versions/34_add_contrib_to_config.py b/augur/application/schema/alembic/versions/34_add_contrib_to_config.py index 273265be2f..1a87be365e 100644 --- a/augur/application/schema/alembic/versions/34_add_contrib_to_config.py +++ b/augur/application/schema/alembic/versions/34_add_contrib_to_config.py @@ -1,4 +1,4 @@ -"""Add extra celery options to the config if they do not exist +"""Add Facade contributor full recollect to config, default to off (0) Revision ID: 34 Revises: 33 From 7f58fbc8075d228c9d427f86e577af31e4a49520 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Mon, 13 Oct 2025 12:24:50 -0500 Subject: [PATCH 015/105] Update date-released format in CITATION.cff Year not recognized without month and date. Signed-off-by: Sean P. Goggins --- CITATION.cff | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CITATION.cff b/CITATION.cff index e26f3d8a86..01514fb22f 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -10,5 +10,5 @@ authors: given-names: Matt title: "Open Source Community Health: Analytical Metrics and Their Corresponding Narratives" doi: 10.1109/SoHeal52568.2021.00010 -date-released: 2021 +date-released: 2021-01-01 url: https://www.seangoggins.net/wp-content/plugins/zotpress/lib/request/request.dl.php?api_user_id=655145&dlkey=HNG22ZSU&content_type=application/pdf From e4a618f50951f82488623ec1c5c4405e0df6235b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 14 Oct 2025 20:46:44 +0100 Subject: [PATCH 016/105] Log facade messages based on the type requested Signed-off-by: Adrian Edwards --- .../tasks/git/util/facade_worker/facade_worker/config.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index 488cdac3f4..f060b34390 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -167,7 +167,13 @@ def log_activity(self, level, status): # Log an activity based upon urgency and user's preference. If the log level is # "Debug", then just print it and don't save it in the database. log_options = ('Error','Quiet','Info','Verbose','Debug') - self.logger.info(f"* {status}\n") + logmsg = f"* {status}\n" + if level == "Error": + self.logger.error(logmsg) + elif level == "Debug" or level == "Verbose": + self.logger.debug(logmsg) + else: + self.logger.info(logmsg) #Return if only debug if level == 'Debug': From 16fb4a038bedecc2f33322980d989a4457cc4138 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Wed, 15 Oct 2025 11:57:27 +0200 Subject: [PATCH 017/105] Fix `git reset` command to include remote default branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `git reset --hard ` isn't a correct command, the argument of `git reset` must be a tree-ish, which a remote name alone isn't. In some cases this command fails with ``` fatal: ambiguous argument 'origin': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git [...] -- [...]' ``` Signed-off-by: Mosè Giordano <765740+giordano@users.noreply.github.com> --- augur/tasks/git/util/facade_worker/facade_worker/repofetch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index 874f338902..f754f4e098 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -435,7 +435,7 @@ def git_repo_updates(facade_helper, repo_git): cmdpull2 = (f"git -C {absolute_path} pull") - cmd_reset = (f"git -C {absolute_path} reset --hard origin") + cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}") cmd_reset_wait = subprocess.Popen( [cmd_reset], shell=True).wait() From 9ebc4eb08ba14dc26ea20d9b56a74e87b0189508 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 15 Sep 2025 13:11:06 -0400 Subject: [PATCH 018/105] refactor send_messages to remove a almost entirely duplicate code path Signed-off-by: Adrian Edwards --- augur/tasks/util/collection_util.py | 62 ++++++++++++++--------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index bed73bd120..28489d63c8 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -597,37 +597,33 @@ def send_messages(self): for repo_git, full_collection in col_hook.repo_list: repo = get_repo_by_repo_git(repo_git) + platform_name = "github" + # this needs to be here and not up a level since it should be set/reset for each repo. + # otherwise a gitlab repo would reset it and cause subsequent github repos to use gitlab phases. + phases = None if "github" in repo.repo_git: - augur_collection_sequence = [] - for job in col_hook.phases: - #Add the phase to the sequence in order as a celery task. - #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git, full_collection)) - - #augur_collection_sequence.append(core_task_success_util.si(repo_git)) - #Link all phases in a chain and send to celery - augur_collection_chain = chain(*augur_collection_sequence) - task_id = augur_collection_chain.apply_async().task_id - - self.logger.info(f"Setting github repo {col_hook.name} status to collecting for repo: {repo_git}") - - #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated - yield repo_git, task_id, col_hook.name - else: - if col_hook.gitlab_phases is not None: - - augur_collection_sequence = [] - for job in col_hook.gitlab_phases: - #Add the phase to the sequence in order as a celery task. - #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git, full_collection)) - - #augur_collection_sequence.append(core_task_success_util.si(repo_git)) - #Link all phases in a chain and send to celery - augur_collection_chain = chain(*augur_collection_sequence) - task_id = augur_collection_chain.apply_async().task_id - - self.logger.info(f"Setting gitlab repo {col_hook.name} status to collecting for repo: {repo_git}") - - #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated - yield repo_git, task_id, col_hook.name + phases = col_hook.phases + # use default platform name + + elif "gitlab" in repo.repo_git: + platform_name = "gitlab" + if col_hook.gitlab_phases is None: + continue + phases = col_hook.gitlab_phases + + augur_collection_sequence = [] + for job in phases: + #Add the phase to the sequence in order as a celery task. + #The preliminary task creates the larger task chain + augur_collection_sequence.append(job(repo_git, full_collection)) + + #augur_collection_sequence.append(core_task_success_util.si(repo_git)) + #Link all phases in a chain and send to celery + augur_collection_chain = chain(*augur_collection_sequence) + task_id = augur_collection_chain.apply_async().task_id + + self.logger.info(f"Setting {platform_name} repo {col_hook.name} status to collecting for repo: {repo_git}") + + #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated + yield repo_git, task_id, col_hook.name + From b1c04dc5cf701ab2e873f8f5b2d1d481073be440 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 30 Sep 2025 19:41:33 -0400 Subject: [PATCH 019/105] remove from template Signed-off-by: Adrian Edwards --- augur/templates/repo-info.j2 | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/augur/templates/repo-info.j2 b/augur/templates/repo-info.j2 index 311daa45f7..2738d70e2a 100644 --- a/augur/templates/repo-info.j2 +++ b/augur/templates/repo-info.j2 @@ -5,21 +5,7 @@ {% if repo.repo_id %}

Report for: {{ repo.repo_name|title }}

{{ repo.repo_git }}

- {% for report in reports %} -

{{ report|replace("_", " ")|title }}

- {% for image in images[report] %} -
-
-
-
-
- -
-
- {% endfor %} - {% endfor %} + {% else %}

Repository {{ repo_id }} not found

{% endif %} From 9a92e17b18b550dd5febf693ae311af2355b4e8d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 30 Sep 2025 19:41:49 -0400 Subject: [PATCH 020/105] remove from main route Signed-off-by: Adrian Edwards --- augur/api/view/routes.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index 00d456733f..91d23531b4 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -221,13 +221,9 @@ def user_settings(): """ @app.route('/repos/views/repo/') def repo_repo_view(id): - # For some reason, there is no reports definition (shouldn't be possible) - if reports is None: - return render_message("Report Definitions Missing", "You requested a report for a repo on this instance, but a definition for the report layout was not found.") - repo = Repo.get_by_id(db_session, id) - return render_module("repo-info", reports=reports.keys(), images=reports, title="Repo", repo=repo, repo_id=id) + return render_module("repo-info", title="Repo", repo=repo, repo_id=id) """ ---------------------------------------------------------------- default: From f07aa9288282c2c97466f2985371e70b846af184 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 30 Sep 2025 19:41:56 -0400 Subject: [PATCH 021/105] remove the report routes Signed-off-by: Adrian Edwards --- augur/api/routes/__init__.py | 2 - augur/api/routes/contributor_reports.py | 1284 --------------- augur/api/routes/pull_request_reports.py | 1922 ---------------------- 3 files changed, 3208 deletions(-) delete mode 100644 augur/api/routes/contributor_reports.py delete mode 100644 augur/api/routes/pull_request_reports.py diff --git a/augur/api/routes/__init__.py b/augur/api/routes/__init__.py index 03c2e2fa71..8176dad94b 100644 --- a/augur/api/routes/__init__.py +++ b/augur/api/routes/__init__.py @@ -4,10 +4,8 @@ from .batch import * from .collection_status import * from .config import * -from .contributor_reports import * from .manager import * from .nonstandard_metrics import * -from .pull_request_reports import * from .user import * from .dei import * from .util import * diff --git a/augur/api/routes/contributor_reports.py b/augur/api/routes/contributor_reports.py deleted file mode 100644 index 6c107ed603..0000000000 --- a/augur/api/routes/contributor_reports.py +++ /dev/null @@ -1,1284 +0,0 @@ -import psycopg2 -import psycopg2 -import sqlalchemy as salc -import numpy as np -import warnings -import datetime -import pandas as pd -from math import pi -from flask import request, send_file, Response, current_app - -# import visualization libraries -from bokeh.io import export_png -from bokeh.embed import json_item -from bokeh.plotting import figure -from bokeh.models import Label, LabelSet, ColumnDataSource, Legend -from bokeh.palettes import Colorblind -from bokeh.layouts import gridplot -from bokeh.transform import cumsum - -from augur.api.routes import AUGUR_API_VERSION -from ..server import app - -warnings.filterwarnings('ignore') - -def quarters(month, year): - if 1 <= month <= 3: - return '01' + '/' + year - elif 4 <= month <= 6: - return '04' + '/' + year - elif 5 <= month <= 9: - return '07' + '/' + year - elif 10 <= month <= 12: - return '10' + '/' + year - -def new_contributor_data_collection(repo_id, required_contributions): - - rank_list = [] - for num in range(1, required_contributions + 1): - rank_list.append(num) - rank_tuple = tuple(rank_list) - -##### - -## Commented out due to abuse. - -##### - - - # contributor_query = salc.sql.text(f""" - - # SELECT * FROM ( - # SELECT ID AS - # cntrb_id, - # A.created_at AS created_at, - # date_part('month', A.created_at::DATE) AS month, - # date_part('year', A.created_at::DATE) AS year, - # A.repo_id, - # repo_name, - # full_name, - # login, - # ACTION, - # rank() OVER ( - # PARTITION BY id - # ORDER BY A.created_at ASC - # ) - # FROM - # ( - # ( - # SELECT - # canonical_id AS ID, - # created_at AS created_at, - # repo_id, - # 'issue_opened' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.issues - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = issues.reporter_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # repo_id = {repo_id} - # AND pull_request IS NULL - # GROUP BY - # canonical_id, - # repo_id, - # issues.created_at, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # canonical_id AS ID, - # TO_TIMESTAMP( cmt_author_date, 'YYYY-MM-DD' ) AS created_at, - # repo_id, - # 'commit' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.commits - # LEFT OUTER JOIN augur_data.contributors ON cntrb_email = cmt_author_email - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # repo_id = {repo_id} - # GROUP BY - # repo_id, - # canonical_email, - # canonical_id, - # commits.cmt_author_date, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # message.cntrb_id AS ID, - # created_at AS created_at, - # commits.repo_id, - # 'commit_comment' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - - # FROM - # augur_data.commit_comment_ref, - # augur_data.commits, - # augur_data.message - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # commits.cmt_id = commit_comment_ref.cmt_id - # AND commits.repo_id = {repo_id} - # AND commit_comment_ref.msg_id = message.msg_id - - # GROUP BY - # ID, - # commits.repo_id, - # commit_comment_ref.created_at, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # issue_events.cntrb_id AS ID, - # issue_events.created_at AS created_at, - # issues.repo_id, - # 'issue_closed' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.issues, - # augur_data.issue_events - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = issue_events.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # issues.repo_id = {repo_id} - # AND issues.issue_id = issue_events.issue_id - # AND issues.pull_request IS NULL - # AND issue_events.cntrb_id IS NOT NULL - # AND ACTION = 'closed' - # GROUP BY - # issue_events.cntrb_id, - # issues.repo_id, - # issue_events.created_at, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # pr_augur_contributor_id AS ID, - # pr_created_at AS created_at, - # pull_requests.repo_id, - # 'open_pull_request' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.pull_requests - # LEFT OUTER JOIN augur_data.contributors ON pull_requests.pr_augur_contributor_id = contributors.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # pull_requests.repo_id = {repo_id} - # GROUP BY - # pull_requests.pr_augur_contributor_id, - # pull_requests.repo_id, - # pull_requests.pr_created_at, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # message.cntrb_id AS ID, - # msg_timestamp AS created_at, - # pull_requests.repo_id as repo_id, - # 'pull_request_comment' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.pull_requests, - # augur_data.pull_request_message_ref, - # augur_data.message - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # pull_requests.repo_id = {repo_id} - # AND pull_request_message_ref.pull_request_id = pull_requests.pull_request_id - # AND pull_request_message_ref.msg_id = message.msg_id - # GROUP BY - # message.cntrb_id, - # pull_requests.repo_id, - # message.msg_timestamp, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # issues.reporter_id AS ID, - # msg_timestamp AS created_at, - # issues.repo_id as repo_id, - # 'issue_comment' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # issues, - # issue_message_ref, - # message - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # issues.repo_id = {repo_id} - # AND issue_message_ref.msg_id = message.msg_id - # AND issues.issue_id = issue_message_ref.issue_id - # AND issues.pull_request_id = NULL - # GROUP BY - # issues.reporter_id, - # issues.repo_id, - # message.msg_timestamp, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) - # ) A, - # repo - # WHERE - # ID IS NOT NULL - # AND A.repo_id = repo.repo_id - # GROUP BY - # A.ID, - # A.repo_id, - # A.ACTION, - # A.created_at, - # repo.repo_name, - # A.full_name, - # A.login - # ORDER BY - # cntrb_id - # ) b - # WHERE RANK IN {rank_tuple} - - # """) - # contributor_query2 = (""" - - # select count(*) from augur_data.repo; - # """) - - #with current_app.engine.connect() as conn: - # df = pd.read_sql(contributor_query2, conn) - - #df = df.loc[~df['full_name'].str.contains('bot', na=False)] - #df = df.loc[~df['login'].str.contains('bot', na=False)] - - #df = df.loc[~df['cntrb_id'].isin(df[df.duplicated(['cntrb_id', 'created_at', 'repo_id', 'rank'])]['cntrb_id'])] - - # add yearmonths to contributor - #df[['month', 'year']] = df[['month', 'year']].astype(int).astype(str) - #df['yearmonth'] = df['month'] + '/' + df['year'] - #df['yearmonth'] = pd.to_datetime(df['yearmonth']) - - # add column with every value being one, so when the contributor df is concatenated - # with the months df, the filler months won't be counted in the sums - #df['new_contributors'] = 1 - - # add quarters to contributor dataframe - #df['month'] = df['month'].astype(int) - #df['quarter'] = df.apply(lambda x: quarters(x['month'], x['year']), axis=1, result_type='reduce') - #df['quarter'] = pd.to_datetime(df['quarter']) - - df = [1] - return df - -def months_data_collection(start_date, end_date): - - # months_query makes a df of years and months, this is used to fill - # the months with no data in the visualizations - months_query = salc.sql.text(f""" - SELECT * - FROM - ( - SELECT - date_part( 'year', created_month :: DATE ) AS year, - date_part( 'month', created_month :: DATE ) AS MONTH - FROM - (SELECT * - FROM ( - SELECT created_month :: DATE - FROM generate_series (TIMESTAMP '{start_date}', TIMESTAMP '{end_date}', INTERVAL '1 month' ) created_month ) d ) x - ) y - """) - - with current_app.engine.connect() as conn: - months_df = pd.read_sql(months_query, conn) - - # add yearmonths to months_df - months_df[['year', 'month']] = months_df[['year', 'month']].astype(float).astype(int).astype(str) - months_df['yearmonth'] = months_df['month'] + '/' + months_df['year'] - months_df['yearmonth'] = pd.to_datetime(months_df['yearmonth']) - - # filter months_df with start_date and end_date, the contributor df is filtered in the visualizations - months_df = months_df.set_index(months_df['yearmonth']) - months_df = months_df.loc[start_date: end_date].reset_index(drop=True) - - # add quarters to months dataframe - months_df['month'] = months_df['month'].astype(int) - months_df['quarter'] = months_df.apply(lambda x: quarters(x['month'], x['year']), axis=1) - months_df['quarter'] = pd.to_datetime(months_df['quarter']) - - return months_df - -def get_repo_id_start_date_and_end_date(): - - now = datetime.datetime.now() - - repo_id = request.args.get('repo_id') - start_date = str(request.args.get('start_date', "{}-01-01".format(now.year - 1))) - end_date = str(request.args.get('end_date', "{}-{}-{}".format(now.year, now.month, now.day))) - - if repo_id: - - if start_date < end_date: - return int(repo_id), start_date, end_date, None - else: - - error = { - "message": "Invalid end_date. end_date is before the start_date", - "status_code": 400 - } - - return int(repo_id), None, None, error - - else: - error = { - "message": "repo_id not specified. Use this endpoint to get a list of available repos: http:///api/unstable/repos", - "status_code": 400 - } - return None, None, None, error - -def filter_out_repeats_without_required_contributions_in_required_time(repeat_list, repeats_df, required_time, - first_list): - - differences = [] - for i in range(0, len(repeat_list)): - time_difference = repeat_list[i] - first_list[i] - total = time_difference.days * 86400 + time_difference.seconds - differences.append(total) - repeats_df['differences'] = differences - - # remove contributions who made enough contributions, but not in a short enough time - repeats_df = repeats_df.loc[repeats_df['differences'] <= required_time * 86400] - - return repeats_df - -def compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, required_time, start_date): - - # create a copy of contributor dataframe - driver_df = input_df.copy() - - # remove first time contributors before begin date, along with their second contribution - mask = (driver_df['yearmonth'] < start_date) - driver_df = driver_df[~driver_df['cntrb_id'].isin(driver_df.loc[mask]['cntrb_id'])] - - # determine if contributor is a drive by by finding all the cntrb_id's that do not have a second contribution - repeats_df = driver_df.copy() - - repeats_df = repeats_df.loc[repeats_df['rank'].isin([1, required_contributions])] - - # removes all the contributors that only have a first contirbution - repeats_df = repeats_df[ - repeats_df['cntrb_id'].isin(repeats_df.loc[driver_df['rank'] == required_contributions]['cntrb_id'])] - - repeat_list = repeats_df.loc[driver_df['rank'] == required_contributions]['created_at'].tolist() - first_list = repeats_df.loc[driver_df['rank'] == 1]['created_at'].tolist() - - repeats_df = repeats_df.loc[driver_df['rank'] == 1] - repeats_df['type'] = 'repeat' - - repeats_df = filter_out_repeats_without_required_contributions_in_required_time( - repeat_list, repeats_df, required_time, first_list) - - repeats_df = repeats_df.loc[repeats_df['differences'] <= required_time * 86400] - - repeat_cntrb_ids = repeats_df['cntrb_id'].to_list() - - drive_by_df = driver_df.loc[~driver_df['cntrb_id'].isin(repeat_cntrb_ids)] - - drive_by_df = drive_by_df.loc[driver_df['rank'] == 1] - drive_by_df['type'] = 'drive_by' - - return drive_by_df, repeats_df - -def add_caption_to_visualizations(caption, required_contributions, required_time, plot_width): - - caption_plot = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - - caption_plot.add_layout(Label( - x=0, - y=160, - x_units='screen', - y_units='screen', - text='{}'.format(caption.format(required_contributions, required_time)), - text_font='times', - text_font_size='15pt', - render_mode='css' - )) - caption_plot.outline_line_color = None - - return caption_plot - -def format_new_cntrb_bar_charts(plot, rank, group_by_format_string): - - plot.xgrid.grid_line_color = None - plot.y_range.start = 0 - plot.axis.minor_tick_line_color = None - plot.outline_line_color = None - - plot.title.align = "center" - plot.title.text_font_size = "18px" - - plot.yaxis.axis_label = 'Second Time Contributors' if rank == 2 else 'New Contributors' - plot.xaxis.axis_label = group_by_format_string - - plot.xaxis.axis_label_text_font_size = "18px" - plot.yaxis.axis_label_text_font_size = "16px" - - plot.xaxis.major_label_text_font_size = "16px" - plot.xaxis.major_label_orientation = 45.0 - - plot.yaxis.major_label_text_font_size = "16px" - - return plot - -def add_charts_and_captions_to_correct_positions(chart_plot, caption_plot, rank, contributor_type, - row_1, row_2, row_3, row_4): - - if rank == 1 and (contributor_type == 'All' or contributor_type == 'repeat'): - row_1.append(chart_plot) - row_2.append(caption_plot) - elif rank == 2 or contributor_type == 'drive_by': - row_3.append(chart_plot) - row_4.append(caption_plot) - -def get_new_cntrb_bar_chart_query_params(): - - group_by = str(request.args.get('group_by', "quarter")) - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - return group_by, required_contributions, required_time - -def remove_rows_before_start_date(df, start_date): - - mask = (df['yearmonth'] < start_date) - result_df = df[~df['cntrb_id'].isin(df.loc[mask]['cntrb_id'])] - - return result_df - -def remove_rows_with_null_values(df, not_null_columns=[]): - """Remove null data from pandas df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that are searched for NULL values - type: list - default: [] (means all columns will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check all columns for NULL values - - Return Value - -- Modified Pandas Dataframe - """ - - if len(not_null_columns) == 0: - not_null_columns = df.columns.to_list() - - total_rows_removed = 0 - for col in not_null_columns: - rows_removed = len(df.loc[df[col].isnull() is True]) - - if rows_removed > 0: - print(f"{rows_removed} rows have been removed because of null values in column {col}") - total_rows_removed += rows_removed - - df = df.loc[df[col].isnull() is False] - - if total_rows_removed > 0: - print(f"\nTotal rows removed because of null data: {total_rows_removed}"); - else: - print("No null data found") - - return df - -def get_needed_columns(df, list_of_columns): - """Get only a specific list of columns from a Pandas Dataframe - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that will be kept in dataframe - type: list - - Return Value - -- Modified Pandas Dataframe - """ - return df[list_of_columns] - -def filter_data(df, needed_columns, not_null_columns=[]): - """Filters out the unneeded rows in the df, and removed NULL data from df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- needed_columns - description: the columns to keep in the dataframe - - -- not_null_columns - description: columns that will be searched for NULL data, - if NULL values are found those rows will be removed - default: [] (means all columns in needed_columns list will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check - all columns in needed_columns list for NULL values - Return Value - -- Modified Pandas Dataframe - """ - - if all(x in needed_columns for x in not_null_columns): - - df = get_needed_columns(df, needed_columns) - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - df = df.dropna(subset=not_null_columns)#remove_rows_with_null_values(df, not_null_columns) - - return df - else: - print("Developer error, not null columns should be a subset of needed columns") - return df - -@app.route('/{}/contributor_reports/new_contributors_bar/'.format(AUGUR_API_VERSION), methods=["GET"]) -def new_contributors_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by, required_contributions, required_time = get_new_cntrb_bar_chart_query_params() - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - # TODO remove full_name from data for all charts since it is not needed in vis generation - not_null_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - input_df = input_df.dropna(subset=not_null_columns)#remove_rows_with_null_values(input_df, not_null_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - contributor_types = ['All', 'repeat', 'drive_by'] - ranks = [1, 2] - - row_1, row_2, row_3, row_4 = [], [], [], [] - - all_df = remove_rows_before_start_date(input_df, start_date) - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - for rank in ranks: - for contributor_type in contributor_types: - - # do not display these visualizations since drive-by's do not have second contributions, and the - # second contribution of a repeat contributor is the same thing as the all the second time contributors - if (rank == 2 and contributor_type == 'drive_by') or (rank == 2 and contributor_type == 'repeat'): - continue - - if contributor_type == 'repeat': - driver_df = repeats_df - - caption = """This graph shows repeat contributors in the specified time period. Repeat contributors - are contributors who have made {} or more contributions in {} days and their first contribution is - in the specified time period. New contributors are individuals who make their first contribution - in the specified time period.""" - - elif contributor_type == 'drive_by': - - driver_df = drive_by_df - - caption = """This graph shows fly by contributors in the specified time period. Fly by contributors - are contributors who make less than the required {} contributions in {} days. New contributors are - individuals who make their first contribution in the specified time period. Of course, then, “All - fly-by’s are by definition first time contributors”. However, not all first time contributors are - fly-by’s.""" - - elif contributor_type == 'All': - - if rank == 1: - driver_df = all_df - # makes df with all first time contributors - driver_df = driver_df.loc[driver_df['rank'] == 1] - caption = """This graph shows all the first time contributors, whether they contribute once, or - contribute multiple times. New contributors are individuals who make their first contribution - in the specified time period.""" - - if rank == 2: - - driver_df = all_df - - # creates df with all second time contributors - driver_df = driver_df.loc[driver_df['rank'] == 2] - caption = """This graph shows the second contribution of all - first time contributors in the specified time period.""" - # y_axis_label = 'Second Time Contributors' - - # filter by end_date, this is not done with the begin date filtering because a repeat contributor - # will look like drive-by if the second contribution is removed by end_date filtering - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # adds all months to driver_df so the lists of dates will include all months and years - driver_df = pd.concat([driver_df, months_df]) - - data = pd.DataFrame() - if group_by == 'year': - - data['dates'] = driver_df[group_by].unique() - - # new contributor counts for y-axis - data['new_contributor_counts'] = driver_df.groupby([group_by]).sum().reset_index()[ - 'new_contributors'] - - # used to format x-axis and title - group_by_format_string = "Year" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, - # then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - - # new contributor counts for y-axis - data['new_contributor_counts'] = driver_df.groupby([date_column]).sum().reset_index()[ - 'new_contributors'] - - # if the data set is large enough it will dynamically assign the width, if the data set is - # too small it will by default set to 870 pixel so the title fits - if len(data['new_contributor_counts']) >= 15: - plot_width = 46 * len(data['new_contributor_counts']) - else: - plot_width = 870 - - # create a dict convert an integer number into a word - # used to turn the rank into a word, so it is nicely displayed in the title - numbers = ['Zero', 'First', 'Second'] - num_conversion_dict = {} - for i in range(1, len(numbers)): - num_conversion_dict[i] = numbers[i] - number = '{}'.format(num_conversion_dict[rank]) - - # define pot for bar chart - p = figure(x_range=data['dates'], plot_height=400, plot_width=plot_width, - title="{}: {} {} Time Contributors Per {}".format(repo_dict[repo_id], - contributor_type.capitalize(), number, - group_by_format_string), - y_range=(0, max(data['new_contributor_counts']) * 1.15), margin=(0, 0, 10, 0)) - - p.vbar(x=data['dates'], top=data['new_contributor_counts'], width=0.8) - - source = ColumnDataSource( - data=dict(dates=data['dates'], new_contributor_counts=data['new_contributor_counts'])) - - # add contributor_count labels to chart - p.add_layout(LabelSet(x='dates', y='new_contributor_counts', text='new_contributor_counts', y_offset=4, - text_font_size="13pt", text_color="black", - source=source, text_align='center')) - - plot = format_new_cntrb_bar_charts(p, rank, group_by_format_string) - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - add_charts_and_captions_to_correct_positions(plot, caption_plot, rank, contributor_type, row_1, - row_2, row_3, row_4) - - # puts plots together into a grid - grid = gridplot([row_1, row_2, row_3, row_4]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/new_contributors_stacked_bar/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def new_contributors_stacked_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by, required_contributions, required_time = get_new_cntrb_bar_chart_query_params() - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - contributor_types = ['All', 'repeat', 'drive_by'] - ranks = [1, 2] - - row_1, row_2, row_3, row_4 = [], [], [], [] - - all_df = remove_rows_before_start_date(input_df, start_date) - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - for rank in ranks: - for contributor_type in contributor_types: - # do not display these visualizations since drive-by's do not have second contributions, - # and the second contribution of a repeat contributor is the same thing as the all the - # second time contributors - if (rank == 2 and contributor_type == 'drive_by') or (rank == 2 and contributor_type == 'repeat'): - continue - - if contributor_type == 'repeat': - driver_df = repeats_df - - caption = """This graph shows repeat contributors in the specified time period. Repeat contributors - are contributors who have made {} or more contributions in {} days and their first contribution is - in the specified time period. New contributors are individuals who make their first contribution in - the specified time period.""" - - elif contributor_type == 'drive_by': - - driver_df = drive_by_df - - caption = """This graph shows fly by contributors in the specified time period. Fly by contributors - are contributors who make less than the required {} contributions in {} days. New contributors are - individuals who make their first contribution in the specified time period. Of course, then, “All - fly-by’s are by definition first time contributors”. However, not all first time contributors are - fly-by’s.""" - - elif contributor_type == 'All': - if rank == 1: - driver_df = all_df - - # makes df with all first time contributors - driver_df = driver_df.loc[driver_df['rank'] == 1] - - caption = """This graph shows all the first time contributors, whether they contribute once, or - contribute multiple times. New contributors are individuals who make their first contribution in - the specified time period.""" - - if rank == 2: - driver_df = all_df - - # creates df with all second time contributor - driver_df = driver_df.loc[driver_df['rank'] == 2] - caption = """This graph shows the second contribution of all first time - contributors in the specified time period.""" - # y_axis_label = 'Second Time Contributors' - - # filter by end_date, this is not done with the begin date filtering because a repeat contributor will - # look like drive-by if the second contribution is removed by end_date filtering - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # adds all months to driver_df so the lists of dates will include all months and years - driver_df = pd.concat([driver_df, months_df]) - - actions = ['open_pull_request', 'pull_request_comment', 'commit', 'issue_closed', 'issue_opened', - 'issue_comment'] - - data = pd.DataFrame() - if group_by == 'year': - - # x-axis dates - data['dates'] = driver_df[group_by].unique() - - for contribution_type in actions: - data[contribution_type] = \ - pd.concat([driver_df.loc[driver_df['action'] == contribution_type], months_df]).groupby( - group_by).sum().reset_index()['new_contributors'] - - # new contributor counts for all actions - data['new_contributor_counts'] = driver_df.groupby([group_by]).sum().reset_index()[ - 'new_contributors'] - - # used to format x-axis and graph title - group_by_format_string = "Year" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, - # then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - - # new_contributor counts for each type of action - for contribution_type in actions: - data[contribution_type] = \ - pd.concat([driver_df.loc[driver_df['action'] == contribution_type], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - - print(data.to_string()) - - # new contributor counts for all actions - data['new_contributor_counts'] = driver_df.groupby([date_column]).sum().reset_index()[ - 'new_contributors'] - - # if the data set is large enough it will dynamically assign the width, if the data set is too small it - # will by default set to 870 pixel so the title fits - if len(data['new_contributor_counts']) >= 15: - plot_width = 46 * len(data['new_contributor_counts']) + 200 - else: - plot_width = 870 - - # create list of values for data source dict - actions_df_references = [] - for action in actions: - actions_df_references.append(data[action]) - - # created dict with the actions as the keys, and the values as the values from the df - data_source = {actions[i]: actions_df_references[i] for i in range(len(actions))} - data_source.update({'dates': data['dates'], 'New Contributor Counts': data['new_contributor_counts']}) - - colors = Colorblind[len(actions)] - - source = ColumnDataSource(data=data_source) - - # create a dict convert an integer number into a word - # used to turn the rank into a word, so it is nicely displayed in the title - numbers = ['Zero', 'First', 'Second'] - num_conversion_dict = {} - for i in range(1, len(numbers)): - num_conversion_dict[i] = numbers[i] - number = '{}'.format(num_conversion_dict[rank]) - - # y_max = 20 - # creates plot to hold chart - p = figure(x_range=data['dates'], plot_height=400, plot_width=plot_width, - title='{}: {} {} Time Contributors Per {}'.format(repo_dict[repo_id], - contributor_type.capitalize(), number, - group_by_format_string), - toolbar_location=None, y_range=(0, max(data['new_contributor_counts']) * 1.15)) - # max(data['new_contributor_counts'])* 1.15), margin = (0, 0, 0, 0)) - - vbar = p.vbar_stack(actions, x='dates', width=0.8, color=colors, source=source) - - # add total count labels - p.add_layout(LabelSet(x='dates', y='New Contributor Counts', text='New Contributor Counts', y_offset=4, - text_font_size="14pt", - text_color="black", source=source, text_align='center')) - - # add legend - legend = Legend(items=[(date, [action]) for (date, action) in zip(actions, vbar)], location=(0, 120), - label_text_font_size="16px") - p.add_layout(legend, 'right') - - plot = format_new_cntrb_bar_charts(p, rank, group_by_format_string) - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - add_charts_and_captions_to_correct_positions(plot, caption_plot, rank, contributor_type, row_1, - row_2, row_3, row_4) - - # puts plots together into a grid - grid = gridplot([row_1, row_2, row_3, row_4]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/returning_contributors_pie_chart/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def returning_contributors_pie_chart(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - print(repeats_df.to_string()) - - driver_df = pd.concat([drive_by_df, repeats_df]) - - # filter df by end date - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # first and second time contributor counts - drive_by_contributors = driver_df.loc[driver_df['type'] == 'drive_by'].count()['new_contributors'] - repeat_contributors = driver_df.loc[driver_df['type'] == 'repeat'].count()['new_contributors'] - - # create a dict with the # of drive-by and repeat contributors - x = {'Drive_By': drive_by_contributors, - 'Repeat': repeat_contributors} - - # turn dict 'x' into a dataframe with columns 'contributor_type', and 'counts' - data = pd.Series(x).reset_index(name='counts').rename(columns={'index': 'contributor_type'}) - - data['angle'] = data['counts'] / data['counts'].sum() * 2 * pi - data['color'] = ('#0072B2', '#E69F00') - data['percentage'] = ((data['angle'] / (2 * pi)) * 100).round(2) - - # format title - title = "{}: Number of Returning " \ - "Contributors out of {} from {} to {}" \ - .format(repo_dict[repo_id], drive_by_contributors + repeat_contributors, start_date, end_date) - - title_text_font_size = 18 - - plot_width = 850 - - # sets plot_width to width of title if title is wider than 850 pixels - if len(title) * title_text_font_size / 2 > plot_width: - plot_width = int(len(title) * title_text_font_size / 2) - - # creates plot for chart - p = figure(plot_height=450, plot_width=plot_width, title=title, - toolbar_location=None, x_range=(-0.5, 1.3), tools='hover', tooltips="@contributor_type", - margin=(0, 0, 0, 0)) - - p.wedge(x=0.87, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), - end_angle=cumsum('angle'), line_color=None, fill_color='color', - legend_field='contributor_type', source=data) - - start_point = 0.88 - for i in range(0, len(data['percentage'])): - # percentages - p.add_layout(Label(x=-0.17, y=start_point + 0.13 * (len(data['percentage']) - 1 - i), - text='{}%'.format(data.iloc[i]['percentage']), - render_mode='css', text_font_size='15px', text_font_style='bold')) - - # contributors - p.add_layout(Label(x=0.12, y=start_point + 0.13 * (len(data['percentage']) - 1 - i), - text='{}'.format(data.iloc[i]['counts']), - render_mode='css', text_font_size='15px', text_font_style='bold')) - - # percentages header - p.add_layout( - Label(x=-0.22, y=start_point + 0.13 * (len(data['percentage'])), text='Percentages', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - # legend header - p.add_layout( - Label(x=-0.43, y=start_point + 0.13 * (len(data['percentage'])), text='Category', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - # contributors header - p.add_layout( - Label(x=0, y=start_point + 0.13 * (len(data['percentage'])), text='# Contributors', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - p.axis.axis_label = None - p.axis.visible = False - p.grid.grid_line_color = None - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.legend.location = "center_left" - p.legend.border_line_color = None - p.legend.label_text_font_style = 'bold' - p.legend.label_text_font_size = "15px" - - plot = p - - caption = """This pie chart shows the percentage of new contributors who were fly-by or repeat contributors. - Fly by contributors are contributors who make less than the required {0} contributions in {1} days. - New contributors are individuals who make their first contribution in the specified time period. - Repeat contributors are contributors who have made {0} or more contributions in {1} days and their - first contribution is in the specified time period.""" - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - # put graph and caption plot together into one grid - grid = gridplot([[plot], [caption_plot]]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/returning_contributors_stacked_bar/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def returning_contributors_stacked_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "quarter")) - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - driver_df = pd.concat([drive_by_df, repeats_df, months_df]) - - # filter by end_date - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # create df to hold data needed for chart - data = pd.DataFrame() - if group_by == 'year': - - # x-axis dates - data['dates'] = driver_df[group_by].unique() - - data['repeat_counts'] = \ - driver_df.loc[driver_df['type'] == 'repeat'].groupby(group_by).count().reset_index()['new_contributors'] - data['drive_by_counts'] = \ - driver_df.loc[driver_df['type'] == 'drive_by'].groupby(group_by).count().reset_index()[ - 'new_contributors'] - - # new contributor counts for all contributor counts - total_counts = [] - for i in range(0, len(data['drive_by_counts'])): - total_counts.append(data.iloc[i]['drive_by_counts'] + data.iloc[i]['repeat_counts']) - data['total_counts'] = total_counts - - # used to format x-axis and graph title - group_by_format_string = "Year" - - # font size of drive by and repeat labels - label_text_font_size = "14pt" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - data['drive_by_counts'] = pd.concat([driver_df.loc[driver_df['type'] == 'drive_by'], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - data['repeat_counts'] = pd.concat([driver_df.loc[driver_df['type'] == 'repeat'], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - - # new contributor counts for all contributor types - total_counts = [] - for i in range(0, len(data['drive_by_counts'])): - total_counts.append(data.iloc[i]['drive_by_counts'] + data.iloc[i]['repeat_counts']) - data['total_counts'] = total_counts - - # font size of drive by and repeat labels - label_text_font_size = "13pt" - - data_source = {'Dates': data['dates'], - 'Fly By': data['drive_by_counts'], - 'Repeat': data['repeat_counts'], - 'All': data['total_counts']} - - groups = ["Fly By", "Repeat"] - - colors = ['#56B4E9', '#E69F00'] - - source = ColumnDataSource(data=data_source) - - # format title - title_text_font_size = 18 - - # if the data set is large enough it will dynamically assign the width, if the data set - # is too small it will by default set to 780 pixel so the title fits - if len(data['total_counts']) >= 13: - plot_width = 46 * len(data['total_counts']) + 210 - else: - plot_width = 780 - - p = figure(x_range=data['dates'], plot_height=500, plot_width=plot_width, - title="{}: Fly By and Repeat Contributor Counts per {}".format(repo_dict[repo_id], - group_by_format_string), - toolbar_location=None, y_range=(0, max(total_counts) * 1.15), margin=(0, 0, 0, 0)) - - vbar = p.vbar_stack(groups, x='Dates', width=0.8, color=colors, source=source) - - # add total counts above bars - p.add_layout(LabelSet(x='Dates', y='All', text='All', y_offset=8, text_font_size="14pt", - text_color="black", source=source, text_align='center')) - - # add drive by count labels - p.add_layout(LabelSet(x='Dates', y='Fly By', text='Fly By', y_offset=-22, text_font_size=label_text_font_size, - text_color="black", source=source, text_align='center')) - - # add repeat count labels - p.add_layout(LabelSet(x='Dates', y='All', text='Repeat', y_offset=-22, text_font_size=label_text_font_size, - text_color="black", source=source, text_align='center')) - - # add legend - legend = Legend(items=[(date, [group]) for (date, group) in zip(groups, vbar)], location=(0, 200), - label_text_font_size="16px") - p.add_layout(legend, 'right') - - p.xgrid.grid_line_color = None - p.y_range.start = 0 - p.axis.minor_tick_line_color = None - p.outline_line_color = None - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.yaxis.axis_label = '# Contributors' - p.xaxis.axis_label = group_by_format_string - - p.xaxis.axis_label_text_font_size = "18px" - p.yaxis.axis_label_text_font_size = "16px" - - p.xaxis.major_label_text_font_size = "16px" - p.xaxis.major_label_orientation = 45.0 - - p.yaxis.major_label_text_font_size = "16px" - - p.legend.label_text_font_size = "20px" - - plot = p - - caption = """This graph shows the number of new contributors in the specified time period, and indicates how - many were fly-by and repeat contributors. Fly by contributors are contributors who make less than the required - {0} contributions in {1} days. New contributors are individuals who make their first contribution in the - specified time period. Repeat contributors are contributors who have made {0} or more contributions in {1} - days and their first contribution is in the specified time period.""" - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - # put graph and caption plot together into one grid - grid = gridplot([[plot], [caption_plot]]) - - filename = export_png(grid) - - return send_file(filename) diff --git a/augur/api/routes/pull_request_reports.py b/augur/api/routes/pull_request_reports.py deleted file mode 100644 index 13aea31e8d..0000000000 --- a/augur/api/routes/pull_request_reports.py +++ /dev/null @@ -1,1922 +0,0 @@ -# import psycopg2 -import pandas as pd -import sqlalchemy as salc -import numpy as np -import warnings -import datetime -import json -# from scipy import stats -from flask import request, send_file, Response, current_app -import math - -from bokeh.palettes import Colorblind, mpl, Category20 -from bokeh.layouts import gridplot, column -from bokeh.models.annotations import Title -from bokeh.io import export_png, show # get_screenshot_as_png -# from bokeh.io.export import get_screenshot_as_png -from bokeh.embed import json_item -from bokeh.models import ColumnDataSource, Legend, LabelSet, Range1d, Label, FactorRange, BasicTicker, ColorBar, \ - LinearColorMapper, PrintfTickFormatter -from bokeh.plotting import figure -from bokeh.models.glyphs import Rect -from bokeh.transform import dodge, factor_cmap, transform - -# from selenium.webdriver import Firefox, FirefoxOptions -# options = FirefoxOptions() -# options.headless = True -# webdriver = Firefox(options=options) -#export_png(item, path, webdriver=webdriver) - -warnings.filterwarnings('ignore') - -from augur.api.routes import AUGUR_API_VERSION -from ..server import app - -def pull_request_data_collection(repo_id, start_date, end_date): - - pr_query = salc.sql.text(f""" - SELECT - repo.repo_id AS repo_id, - pull_requests.pr_src_id AS pr_src_id, - repo.repo_name AS repo_name, - pr_src_author_association, - repo_groups.rg_name AS repo_group, - pull_requests.pr_src_state, - pull_requests.pr_merged_at, - pull_requests.pr_created_at AS pr_created_at, - pull_requests.pr_closed_at AS pr_closed_at, - date_part( 'year', pr_created_at :: DATE ) AS CREATED_YEAR, - date_part( 'month', pr_created_at :: DATE ) AS CREATED_MONTH, - date_part( 'year', pr_closed_at :: DATE ) AS CLOSED_YEAR, - date_part( 'month', pr_closed_at :: DATE ) AS CLOSED_MONTH, - pr_src_meta_label, - pr_head_or_base, - ( EXTRACT ( EPOCH FROM pull_requests.pr_closed_at ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_close, - ( EXTRACT ( EPOCH FROM pull_requests.pr_closed_at ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_close, - ( EXTRACT ( EPOCH FROM first_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_first_response, - ( EXTRACT ( EPOCH FROM first_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_first_response, - ( EXTRACT ( EPOCH FROM last_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_last_response, - ( EXTRACT ( EPOCH FROM last_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_last_response, - first_response_time, - last_response_time, - EXTRACT ( EPOCH FROM average_time_between_responses), - assigned_count, - review_requested_count, - labeled_count, - subscribed_count, - mentioned_count, - referenced_count, - closed_count, - head_ref_force_pushed_count, - merged_count::INT, - milestoned_count, - unlabeled_count, - head_ref_deleted_count, - comment_count, - COALESCE(lines_added, 0) as lines_added, - COALESCE(lines_removed, 0) as lines_removed, - commit_count, - COALESCE(file_count, 0) as file_count - FROM - repo, - repo_groups, - pull_requests LEFT OUTER JOIN ( - SELECT pull_requests.pull_request_id, - count(*) FILTER (WHERE action = 'assigned') AS assigned_count, - count(*) FILTER (WHERE action = 'review_requested') AS review_requested_count, - count(*) FILTER (WHERE action = 'labeled') AS labeled_count, - count(*) FILTER (WHERE action = 'unlabeled') AS unlabeled_count, - count(*) FILTER (WHERE action = 'subscribed') AS subscribed_count, - count(*) FILTER (WHERE action = 'mentioned') AS mentioned_count, - count(*) FILTER (WHERE action = 'referenced') AS referenced_count, - count(*) FILTER (WHERE action = 'closed') AS closed_count, - count(*) FILTER (WHERE action = 'head_ref_force_pushed') AS head_ref_force_pushed_count, - count(*) FILTER (WHERE action = 'head_ref_deleted') AS head_ref_deleted_count, - count(*) FILTER (WHERE action = 'milestoned') AS milestoned_count, - COALESCE(count(*) FILTER (WHERE action = 'merged'), 0) AS merged_count, - COALESCE(MIN(message.msg_timestamp), pull_requests.pr_merged_at, pull_requests.pr_closed_at) AS first_response_time, - COALESCE(COUNT(DISTINCT message.msg_timestamp), 0) AS comment_count, - COALESCE(MAX(message.msg_timestamp), pull_requests.pr_closed_at) AS last_response_time, - COALESCE((MAX(message.msg_timestamp) - MIN(message.msg_timestamp)) / COUNT(DISTINCT message.msg_timestamp), pull_requests.pr_created_at - pull_requests.pr_closed_at) AS average_time_between_responses - FROM pull_requests - LEFT OUTER JOIN pull_request_events on pull_requests.pull_request_id = pull_request_events.pull_request_id - JOIN repo on repo.repo_id = pull_requests.repo_id - LEFT OUTER JOIN pull_request_message_ref on pull_requests.pull_request_id = pull_request_message_ref.pull_request_id - LEFT OUTER JOIN message on pull_request_message_ref.msg_id = message.msg_id - WHERE repo.repo_id = {repo_id} - GROUP BY pull_requests.pull_request_id - ) response_times - ON pull_requests.pull_request_id = response_times.pull_request_id - LEFT JOIN ( - SELECT pull_request_commits.pull_request_id, count(DISTINCT pr_cmt_sha) AS commit_count - FROM pull_request_commits, pull_requests, pull_request_meta - WHERE pull_requests.pull_request_id = pull_request_commits.pull_request_id - AND pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND pr_cmt_sha <> pull_requests.pr_merge_commit_sha - AND pr_cmt_sha <> pull_request_meta.pr_sha - GROUP BY pull_request_commits.pull_request_id - ) all_commit_counts - ON pull_requests.pull_request_id = all_commit_counts.pull_request_id - LEFT JOIN ( - SELECT MAX(pr_repo_meta_id), pull_request_meta.pull_request_id, pr_head_or_base, pr_src_meta_label - FROM pull_requests, pull_request_meta - WHERE pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND pr_head_or_base = 'base' - GROUP BY pull_request_meta.pull_request_id, pr_head_or_base, pr_src_meta_label - ) base_labels - ON base_labels.pull_request_id = all_commit_counts.pull_request_id - LEFT JOIN ( - SELECT sum(cmt_added) AS lines_added, sum(cmt_removed) AS lines_removed, pull_request_commits.pull_request_id, count(DISTINCT cmt_filename) AS file_count - FROM pull_request_commits, commits, pull_requests, pull_request_meta - WHERE cmt_commit_hash = pr_cmt_sha - AND pull_requests.pull_request_id = pull_request_commits.pull_request_id - AND pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND commits.repo_id = pull_requests.repo_id - AND commits.cmt_commit_hash <> pull_requests.pr_merge_commit_sha - AND commits.cmt_commit_hash <> pull_request_meta.pr_sha - GROUP BY pull_request_commits.pull_request_id - ) master_merged_counts - ON base_labels.pull_request_id = master_merged_counts.pull_request_id - WHERE - repo.repo_group_id = repo_groups.repo_group_id - AND repo.repo_id = pull_requests.repo_id - AND repo.repo_id = {repo_id} - ORDER BY - merged_count DESC - """) - - with current_app.engine.connect() as conn: - pr_all = pd.read_sql(pr_query, conn) - - pr_all[['assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', - 'comment_count', - 'commit_count', - 'file_count', - 'lines_added', - 'lines_removed' - ]] = pr_all[['assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', - 'comment_count', - 'commit_count', - 'file_count', - 'lines_added', - 'lines_removed' - ]].astype(float) - # Change years to int so that doesn't display as 2019.0 for example - pr_all[['created_year', 'closed_year']] = pr_all[['created_year', 'closed_year']].fillna(-1).astype(int).astype( - str) - - start_date = pd.to_datetime(start_date) - # end_date = pd.to_datetime('2020-02-01 09:00:00') - end_date = pd.to_datetime(end_date) - pr_all = pr_all[(pr_all['pr_created_at'] > start_date) & (pr_all['pr_closed_at'] < end_date)] - - pr_all['created_year'] = pr_all['created_year'].map(int) - pr_all['created_month'] = pr_all['created_month'].map(int) - pr_all['created_month'] = pr_all['created_month'].map(lambda x: '{0:0>2}'.format(x)) - pr_all['created_yearmonth'] = pd.to_datetime( - pr_all['created_year'].map(str) + '-' + pr_all['created_month'].map(str) + '-01') - - # getting the number of days of (today - created at) for the PRs that are still open - # and putting this in the days_to_close column - - # get timedeltas of creation time to todays date/time - days_to_close_open_pr = datetime.datetime.now() - pr_all.loc[pr_all['pr_src_state'] == 'open']['pr_created_at'] - - # get num days from above timedelta - days_to_close_open_pr = days_to_close_open_pr.apply(lambda x: x.days).astype(int) - - # for only OPEN pr's, set the days_to_close column equal to above dataframe - pr_all.loc[pr_all['pr_src_state'] == 'open'] = pr_all.loc[pr_all['pr_src_state'] == 'open'].assign( - days_to_close=days_to_close_open_pr) - - pr_all.loc[pr_all['pr_src_state'] == 'open'].head() - - # initiate column by setting all null datetimes - pr_all['closed_yearmonth'] = pd.to_datetime(np.nan) - - # Fill column with prettified string of year/month closed that looks like: 2019-07-01 - pr_all.loc[pr_all['pr_src_state'] == 'closed'] = pr_all.loc[pr_all['pr_src_state'] == 'closed'].assign( - closed_yearmonth=pd.to_datetime(pr_all.loc[pr_all['pr_src_state'] == 'closed']['closed_year'].astype(int - ).map( - str) + '-' + pr_all.loc[pr_all['pr_src_state'] == 'closed']['closed_month'].astype(int).map( - str) + '-01')) - - """ Merged flag """ - if 'pr_merged_at' in pr_all.columns.values: - pr_all['pr_merged_at'] = pr_all['pr_merged_at'].fillna(0) - pr_all['merged_flag'] = 'Not Merged / Rejected' - pr_all['merged_flag'].loc[pr_all['pr_merged_at'] != 0] = 'Merged / Accepted' - pr_all['merged_flag'].loc[pr_all['pr_src_state'] == 'open'] = 'Still Open' - del pr_all['pr_merged_at'] - - # Isolate the different state PRs for now - pr_open = pr_all.loc[pr_all['pr_src_state'] == 'open'] - pr_closed = pr_all.loc[pr_all['pr_src_state'] == 'closed'] - pr_merged = pr_all.loc[pr_all['merged_flag'] == 'Merged / Accepted'] - pr_not_merged = pr_all.loc[pr_all['merged_flag'] == 'Not Merged / Rejected'] - - # Filtering the 80th percentile slowest PRs - def filter_20_per_slowest(input_df): - pr_slow20_filtered = pd.DataFrame() - pr_slow20_x = pd.DataFrame() - pr_slow20_filtered = input_df.copy() - pr_slow20_filtered['percentile_rank_local'] = pr_slow20_filtered.days_to_close.rank(pct=True) - pr_slow20_filtered = pr_slow20_filtered.query('percentile_rank_local >= .8', ) - - return pr_slow20_filtered - - pr_slow20_open = filter_20_per_slowest(pr_open) - pr_slow20_closed = filter_20_per_slowest(pr_closed) - pr_slow20_merged = filter_20_per_slowest(pr_merged) - pr_slow20_not_merged = filter_20_per_slowest(pr_not_merged) - pr_slow20_all = filter_20_per_slowest(pr_all) - - return pr_all, pr_open, pr_closed, pr_merged, pr_not_merged, pr_slow20_all, pr_slow20_open, pr_slow20_closed, pr_slow20_merged, pr_slow20_not_merged - -def remove_outliers(input_df, field, num_outliers_repo_map): - df_no_outliers = input_df.copy() - for repo_name, num_outliers in num_outliers_repo_map.items(): - indices_to_drop = input_df.loc[input_df['repo_name'] == repo_name].nlargest(num_outliers, field).index - df_no_outliers = df_no_outliers.drop(index=indices_to_drop) - return df_no_outliers - -def remove_outliers_by_standard_deviation(input_df, column): - '''Takes a dataframe and a numeric column name. - Then removes all rows thare are than 3 standard deviations from the mean. - Returns a df without outliers, the # of outliers removed, outlier cutoff value''' - - # finds rows that are more than 3 standard deviations from the mean - outlier_cutoff = input_df[column].mean() + (3 * input_df[column].std()) - outlier_mask = input_df[column] > outlier_cutoff - - # determine number of outliers - outliers_removed = len(input_df.loc[outlier_mask]) - - df_no_outliers = input_df.loc[~outlier_mask] - - return df_no_outliers, outliers_removed, outlier_cutoff - -def hex_to_RGB(hex): - ''' "#FFFFFF" -> [255,255,255] ''' - # Pass 16 to the integer function for change of base - return [int(hex[i:i + 2], 16) for i in range(1, 6, 2)] - -def color_dict(gradient): - ''' Takes in a list of RGB sub-lists and returns dictionary of - colors in RGB and hex form for use in a graphing function - defined later on ''' - return {"hex": [RGB_to_hex(RGB) for RGB in gradient], - "r": [RGB[0] for RGB in gradient], - "g": [RGB[1] for RGB in gradient], - "b": [RGB[2] for RGB in gradient]} - -def RGB_to_hex(RGB): - ''' [255,255,255] -> "#FFFFFF" ''' - # Components need to be integers for hex to make sense - RGB = [int(x) for x in RGB] - return "#" + "".join(["0{0:x}".format(v) if v < 16 else - "{0:x}".format(v) for v in RGB]) - -def linear_gradient(start_hex, finish_hex="#FFFFFF", n=10): - ''' returns a gradient list of (n) colors between - two hex colors. start_hex and finish_hex - should be the full six-digit color string, - inlcuding the number sign ("#FFFFFF") ''' - # Starting and ending colors in RGB form - s = hex_to_RGB(start_hex) - f = hex_to_RGB(finish_hex) - # Initilize a list of the output colors with the starting color - RGB_list = [s] - # Calcuate a color at each evenly spaced value of t from 1 to n - for t in range(1, n): - # Interpolate RGB vector for color at the current value of t - curr_vector = [ - int(s[j] + (float(t) / (n - 1)) * (f[j] - s[j])) - for j in range(3) - ] - # Add it to our list of output colors - RGB_list.append(curr_vector) - - return color_dict(RGB_list) - -# dict of df types, and their locaiton in the tuple that the function pull_request_data_collection returns -def get_df_tuple_locations(): - return {"pr_all": 0, "pr_open": 1, "pr_closed": 2, "pr_merged": 3, "pr_not_merged": 4, "pr_slow20_all": 5, - "pr_slow20_open": 6, "pr_slow20_closed": 7, "pr_slow20_merged": 8, "pr_slow20_not_merged": 9} - -def add_caption_to_plot(caption_plot, caption): - - caption_plot.add_layout(Label( - x=0, # Change to shift caption left or right - y=160, - x_units='screen', - y_units='screen', - text='{}'.format(caption), - text_font='times', # Use same font as paper - text_font_size='15pt', - render_mode='css' - )) - caption_plot.outline_line_color = None - - return caption_plot - -def remove_rows_with_null_values(df, not_null_columns=[]): - """Remove null data from pandas df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that are searched for NULL values - type: list - default: [] (means all columns will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check all columns for NULL values - - Return Value - -- Modified Pandas Dataframe - """ - - if len(not_null_columns) == 0: - not_null_columns = df.columns.to_list() - - total_rows_removed = 0 - for col in not_null_columns: - rows_removed = len(df.loc[df[col].isnull()]) - #rows_removed = len(df.loc[df[col].isnull() is True]) - - if rows_removed > 0: - print(f"{rows_removed} rows have been removed because of null values in column {col}") - total_rows_removed += rows_removed - - df = df.loc[df[col].isnull() is False] - - if total_rows_removed > 0: - print(f"\nTotal rows removed because of null data: {total_rows_removed}"); - else: - print("No null data found") - - return df - -def get_needed_columns(df, list_of_columns): - """Get only a specific list of columns from a Pandas Dataframe - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that will be kept in dataframe - type: list - - Return Value - -- Modified Pandas Dataframe - """ - return df[list_of_columns] - -def filter_data(df, needed_columns, not_null_columns=[]): - """Filters out the unneeded rows in the df, and removed NULL data from df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- needed_columns - description: the columns to keep in the dataframe - - -- not_null_columns - description: columns that will be searched for NULL data, - if NULL values are found those rows will be removed - default: [] (means all columns in needed_columns list will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check - all columns in needed_columns list for NULL values - Return Value - -- Modified Pandas Dataframe - """ - - if all(x in needed_columns for x in not_null_columns): - - df = get_needed_columns(df, needed_columns) - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - df = df.dropna(subset=not_null_columns)#remove_rows_with_null_values(df, not_null_columns) - - return df - else: - print("Developer error, not null columns should be a subset of needed columns") - return df - -def get_repo_id_start_date_and_end_date(): - - """ Gets the repo_id, start_date, and end_date from the GET requests array - - :return: repo_id - id of the repo data is being retrieved for - :return: start_date - earliest time on visualization. Defaults to the January 1st of last year - :return: end_date - latest time on visualization. Defaults to current date - """ - - now = datetime.datetime.now() - - repo_id = request.args.get('repo_id') - start_date = str(request.args.get('start_date', "{}-01-01".format(now.year - 1))) - end_date = str(request.args.get('end_date', "{}-{}-{}".format(now.year, now.month, now.day))) - - if repo_id: - - if start_date < end_date: - return int(repo_id), start_date, end_date, None - else: - - error = { - "message": "Invalid end_date. end_date is before the start_date", - "status_code": 400 - } - - return int(repo_id), None, None, error - - else: - error = { - "message": "repo_id not specified. Use this endpoint to get a list of available repos: http:///api/unstable/repos", - "status_code": 400 - } - return None, None, None, error - -@app.route('/{}/pull_request_reports/average_commits_per_PR/'.format(AUGUR_API_VERSION), methods=["GET"]) -def average_commits_per_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "month")) - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - y_axis = 'num_commits' - group_by_bars = 'merged_flag' - description = 'All' - - # gets pr_all data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_all"]] - needed_columns = ['repo_id', 'repo_name', 'closed_year', 'closed_yearmonth', group_by_bars, 'commit_count'] - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # print(input_df.to_string()) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() # deep copy input data so we do not change the external dataframe - - # Change closed year to int so that doesn't display as 2019.0 for example - driver_df['closed_year'] = driver_df['closed_year'].astype(int).astype(str) - - # defaults to year - x_axis = 'closed_year' - x_groups = sorted(list(driver_df[x_axis].unique())) - - if group_by == 'month': - x_axis = "closed_yearmonth" - x_groups = np.unique(np.datetime_as_string(input_df[x_axis], unit='M')) - - # inner groups on x_axis they are merged and not_merged - groups = list(driver_df[group_by_bars].unique()) - - # setup color pallete - try: - colors = mpl['Plasma'][len(groups)] - except: - colors = [mpl['Plasma'][3][0]] + [mpl['Plasma'][3][1]] - - merged_avg_values = list(driver_df.loc[driver_df[group_by_bars] == 'Merged / Accepted'].groupby([x_axis], - as_index=False).mean().round( - 1)['commit_count']) - not_merged_avg_values = list( - driver_df.loc[driver_df[group_by_bars] == 'Not Merged / Rejected'].groupby([x_axis], - as_index=False).mean().round(1)[ - 'commit_count']) - - # Setup data in format for grouped bar chart - data = { - 'years': x_groups, - 'Merged / Accepted': merged_avg_values, - 'Not Merged / Rejected': not_merged_avg_values, - } - - x = [(year, pr_state) for year in x_groups for pr_state in groups] - counts = sum(zip(data['Merged / Accepted'], data['Not Merged / Rejected']), ()) - - source = ColumnDataSource(data=dict(x=x, counts=counts)) - - title_beginning = '{}: '.format(repo_dict[repo_id]) - title = "{}Average Commit Counts Per Year for {} Pull Requests".format(title_beginning, description) - - plot_width = len(x_groups) * 300 - title_text_font_size = 16 - - if (len(title) * title_text_font_size / 2) > plot_width: - plot_width = int(len(title) * title_text_font_size / 2) + 40 - - p = figure(x_range=FactorRange(*x), plot_height=450, plot_width=plot_width, title=title, - y_range=(0, max(merged_avg_values + not_merged_avg_values) * 1.15), toolbar_location=None) - - # Vertical bar glyph - p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", - fill_color=factor_cmap('x', palette=colors, factors=groups, start=1, end=2)) - - # Data label - labels = LabelSet(x='x', y='counts', text='counts', # y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="black", - source=source, text_align='center') - p.add_layout(labels) - - p.y_range.start = 0 - p.x_range.range_padding = 0.1 - p.xaxis.major_label_orientation = 1 - p.xgrid.grid_line_color = None - - p.yaxis.axis_label = 'Average Commits / Pull Request' - p.xaxis.axis_label = 'Year Closed' - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "15px" - - p.yaxis.axis_label_text_font_size = "15px" - p.yaxis.major_label_text_font_size = "15px" - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average commits per pull requests over an entire year," \ - " for merged and not merged pull requests." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "average_commits_per_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - # filename = export_png(grid, timeout=180, webdriver=webdriver) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/average_comments_per_PR/'.format(AUGUR_API_VERSION), methods=["GET"]) -def average_comments_per_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - group_by = 'merged_flag' - x_axis = 'comment_count' - description = "All Closed" - y_axis = 'closed_year' - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', y_axis, group_by, x_axis] - not_null_columns = needed_columns - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() - - try: - y_groups = sorted(list(driver_df[y_axis].unique())) - except: - y_groups = [repo_id] - - groups = driver_df[group_by].unique() - try: - colors = mpl['Plasma'][len(groups)] - except: - colors = [mpl['Plasma'][3][0]] + [mpl['Plasma'][3][1]] - - len_not_merged = len(driver_df.loc[driver_df['merged_flag'] == 'Not Merged / Rejected']) - len_merged = len(driver_df.loc[driver_df['merged_flag'] == 'Merged / Accepted']) - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 650 - p = figure(y_range=y_groups, plot_height=450, plot_width=plot_width, - # y_range=y_groups,#(pr_all[y_axis].min(),pr_all[y_axis].max()) #y_axis_type="datetime", - title='{} {}'.format(title_beginning, "Mean Comments for {} Pull Requests".format(description)), - toolbar_location=None) - - possible_maximums = [] - for y_value in y_groups: - - y_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Merged / Accepted')] - y_not_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Not Merged / Rejected')] - - if len(y_merged_data) > 0: - y_merged_data_mean = y_merged_data[x_axis].mean() - - if (math.isnan(y_merged_data_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_merged_data[x_axis + '_mean'] = y_merged_data_mean.round(1) - - else: - y_merged_data[x_axis + '_mean'] = 0 - - if len(y_not_merged_data) > 0: - y_not_merged_data_mean = y_not_merged_data[x_axis].mean() - - if math.isnan(y_not_merged_data_mean): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_not_merged_data[x_axis + '_mean'] = y_not_merged_data_mean.round(1) - - else: - y_not_merged_data[x_axis + '_mean'] = 0 - - not_merged_source = ColumnDataSource(y_not_merged_data) - merged_source = ColumnDataSource(y_merged_data) - - possible_maximums.append(max(y_not_merged_data[x_axis + '_mean'])) - possible_maximums.append(max(y_merged_data[x_axis + '_mean'])) - - # mean comment count for merged - merged_comment_count_glyph = p.hbar(y=dodge(y_axis, -0.1, range=p.y_range), left=0, right=x_axis + '_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=merged_source, - fill_color="black") # ,legend_label="Mean Days to Close", - # Data label - labels = LabelSet(x=x_axis + '_mean', y=dodge(y_axis, -0.1, range=p.y_range), text=x_axis + '_mean', - y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="black", - source=merged_source, text_align='center') - p.add_layout(labels) - # mean comment count For nonmerged - not_merged_comment_count_glyph = p.hbar(y=dodge(y_axis, 0.1, range=p.y_range), left=0, - right=x_axis + '_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=not_merged_source, - fill_color="#e84d60") # legend_label="Mean Days to Close", - # Data label - labels = LabelSet(x=x_axis + '_mean', y=dodge(y_axis, 0.1, range=p.y_range), text=x_axis + '_mean', - y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="#e84d60", - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # p.y_range.range_padding = 0.1 - p.ygrid.grid_line_color = None - p.legend.location = "bottom_right" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - p.xaxis.axis_label = 'Average Comments / Pull Request' - p.yaxis.axis_label = 'Repository' if y_axis == 'repo_name' else 'Year Closed' if y_axis == 'closed_year' else '' - - legend = Legend( - items=[ - ("Merged Pull Request Mean Comment Count", [merged_comment_count_glyph]), - ("Rejected Pull Request Mean Comment Count", [not_merged_comment_count_glyph]) - ], - - location='center', - orientation='vertical', - border_line_color="black" - ) - p.add_layout(legend, "below") - - p.title.text_font_size = "16px" - p.title.align = "center" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - p.x_range = Range1d(0, max(possible_maximums) * 1.15) - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of comments per merged or not merged pull request." - - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "average_comments_per_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/PR_counts_by_merged_status/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def PR_counts_by_merged_status(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - x_axis = 'closed_year' - description = 'All Closed' - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - pr_closed = df_tuple[df_type["pr_closed"]] - pr_closed_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_closed = filter_data(pr_closed, pr_closed_needed_columns) - - # gets pr_slow20_not_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_not_merged = df_tuple[df_type["pr_slow20_not_merged"]] - pr_slow20_not_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_slow20_not_merged = filter_data(pr_slow20_not_merged, pr_slow20_not_merged_needed_columns,) - - # gets pr_slow20_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_merged = df_tuple[df_type["pr_slow20_merged"]] - pr_slow20_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_slow20_merged = filter_data(pr_slow20_merged, pr_slow20_merged_needed_columns) - - if len(pr_closed) == 0 or len(pr_slow20_not_merged) == 0 or len(pr_slow20_merged) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - data_dict = {'All': pr_closed, 'Slowest 20%': pr_slow20_not_merged.append(pr_slow20_merged, ignore_index=True)} - - colors = mpl['Plasma'][6] - - for data_desc, input_df in data_dict.items(): - x_groups = sorted(list(input_df[x_axis].astype(str).unique())) - break - - plot_width = 315 * len(x_groups) - - if plot_width < 900: - plot_width = 900 - title_beginning = repo_dict[repo_id] - p = figure(x_range=x_groups, plot_height=350, plot_width=plot_width, - title='{}: {}'.format(title_beginning, - "Count of {} Pull Requests by Merged Status".format(description)), - toolbar_location=None) - - dodge_amount = 0.12 - color_index = 0 - x_offset = 60 - - all_totals = [] - for data_desc, input_df in data_dict.items(): - driver_df = input_df.copy() - - driver_df[x_axis] = driver_df[x_axis].astype(str) - - groups = sorted(list(driver_df['merged_flag'].unique())) - - driver_df = driver_df.loc[driver_df['repo_id'] == repo_id] - - len_merged = [] - zeros = [] - len_not_merged = [] - totals = [] - - for x_group in x_groups: - len_merged_entry = len( - driver_df.loc[(driver_df['merged_flag'] == 'Merged / Accepted') & (driver_df[x_axis] == x_group)]) - totals += [len(driver_df.loc[(driver_df['merged_flag'] == 'Not Merged / Rejected') & ( - driver_df[x_axis] == x_group)]) + len_merged_entry] - len_not_merged += [len(driver_df.loc[(driver_df['merged_flag'] == 'Not Merged / Rejected') & ( - driver_df[x_axis] == x_group)])] - len_merged += [len_merged_entry] - zeros.append(0) - - data = {'X': x_groups} - for group in groups: - data[group] = [] - for x_group in x_groups: - data[group] += [ - len(driver_df.loc[(driver_df['merged_flag'] == group) & (driver_df[x_axis] == x_group)])] - - data['len_merged'] = len_merged - data['len_not_merged'] = len_not_merged - data['totals'] = totals - data['zeros'] = zeros - - if data_desc == "All": - all_totals = totals - - source = ColumnDataSource(data) - - stacked_bar = p.vbar_stack(groups, x=dodge('X', dodge_amount, range=p.x_range), width=0.2, source=source, - color=colors[1:3], legend_label=[f"{data_desc} " + "%s" % x for x in groups]) - # Data label for merged - - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='zeros', text='len_merged', y_offset=2, - x_offset=x_offset, - text_font_size="12pt", text_color=colors[1:3][0], - source=source, text_align='center') - ) - if min(data['totals']) < 400: - y_offset = 15 - else: - y_offset = 0 - # Data label for not merged - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='totals', text='len_not_merged', - y_offset=y_offset, x_offset=x_offset, - text_font_size="12pt", text_color=colors[1:3][1], - source=source, text_align='center') - ) - # Data label for total - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='totals', text='totals', y_offset=0, x_offset=0, - text_font_size="12pt", text_color='black', - source=source, text_align='center') - ) - dodge_amount *= -1 - colors = colors[::-1] - x_offset *= -1 - - p.y_range = Range1d(0, max(all_totals) * 1.4) - - p.xgrid.grid_line_color = None - p.legend.location = "top_center" - p.legend.orientation = "horizontal" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - p.yaxis.axis_label = 'Count of Pull Requests' - p.xaxis.axis_label = 'Repository' if x_axis == 'repo_name' else 'Year Closed' if x_axis == 'closed_year' else '' - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - p.outline_line_color = None - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the number of closed pull requests per year in " \ - "four different categories. These four categories are All Merged, All Not Merged," \ - " Slowest 20% Merged, and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "PR_counts_by_merged_status")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/mean_response_times_for_PR/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def mean_response_times_for_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - time_unit = 'days' - x_max = 95 - y_axis = 'closed_year' - description = "All Closed" - legend_position = (410, 10) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', y_axis, 'merged_flag', time_unit + '_to_first_response', - time_unit + '_to_last_response', time_unit + '_to_close'] - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() # deep copy input data so we do not alter the external dataframe - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 950 - p = figure(toolbar_location=None, y_range=sorted(driver_df[y_axis].unique()), plot_width=plot_width, - plot_height=450, # 75*len(driver_df[y_axis].unique()), - title="{}Mean Response Times for Pull Requests {}".format(title_beginning, description)) - - first_response_glyphs = [] - last_response_glyphs = [] - merged_days_to_close_glyphs = [] - not_merged_days_to_close_glyphs = [] - - possible_maximums = [] - - # FIXME repo_set is not defined - # setup color pallete - try: - colors = Colorblind[len(repo_set)] - except: - colors = Colorblind[3] - - y_merged_data_list = [] - y_not_merged_data_list = [] - - # calculate data frist time to obtain the maximum and make sure there is message data - for y_value in driver_df[y_axis].unique(): - - y_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Merged / Accepted')] - y_not_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Not Merged / Rejected')] - - if len(y_merged_data) > 0: - - y_merged_data_first_response_mean = y_merged_data[time_unit + '_to_first_response'].mean() - y_merged_data_last_response_mean = y_merged_data[time_unit + '_to_last_response'].mean() - y_merged_data_to_close_mean = y_merged_data[time_unit + '_to_close'].mean() - - if (math.isnan(y_merged_data_first_response_mean) or math.isnan( - y_merged_data_last_response_mean) or math.isnan(y_merged_data_to_close_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_merged_data[time_unit + '_to_first_response_mean'] = y_merged_data_first_response_mean.round(1) - y_merged_data[time_unit + '_to_last_response_mean'] = y_merged_data_last_response_mean.round(1) - y_merged_data[time_unit + '_to_close_mean'] = y_merged_data_to_close_mean.round(1) - else: - y_merged_data[time_unit + '_to_first_response_mean'] = 0.00 - y_merged_data[time_unit + '_to_last_response_mean'] = 0.00 - y_merged_data[time_unit + '_to_close_mean'] = 0.00 - - if len(y_not_merged_data) > 0: - - y_not_merged_data_first_response_mean = y_not_merged_data[time_unit + '_to_first_response'].mean() - y_not_merged_data_last_response_mean = y_not_merged_data[time_unit + '_to_last_response'].mean() - y_not_merged_data_to_close_mean = y_not_merged_data[time_unit + '_to_close'].mean() - - if (math.isnan(y_not_merged_data_first_response_mean) or math.isnan( - y_not_merged_data_last_response_mean) or math.isnan(y_not_merged_data_to_close_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_not_merged_data[ - time_unit + '_to_first_response_mean'] = y_not_merged_data_first_response_mean.round(1) - y_not_merged_data[ - time_unit + '_to_last_response_mean'] = y_not_merged_data_last_response_mean.round(1) - y_not_merged_data[time_unit + '_to_close_mean'] = y_not_merged_data_to_close_mean.round(1) - else: - y_not_merged_data[time_unit + '_to_first_response_mean'] = 0.00 - y_not_merged_data[time_unit + '_to_last_response_mean'] = 0.00 - y_not_merged_data[time_unit + '_to_close_mean'] = 0.00 - - possible_maximums.append(max(y_merged_data[time_unit + '_to_close_mean'])) - possible_maximums.append(max(y_not_merged_data[time_unit + '_to_close_mean'])) - - maximum = max(possible_maximums) * 1.15 - ideal_difference = maximum * 0.064 - - y_merged_data_list.append(y_merged_data) - y_not_merged_data_list.append(y_not_merged_data) - - # loop through data and add it to the plot - for index in range(0, len(y_merged_data_list)): - - y_merged_data = y_merged_data_list[index] - y_not_merged_data = y_not_merged_data_list[index] - - not_merged_source = ColumnDataSource(y_not_merged_data) - merged_source = ColumnDataSource(y_merged_data) - - # mean PR length for merged - merged_days_to_close_glyph = p.hbar(y=dodge(y_axis, -0.1, range=p.y_range), left=0, - right=time_unit + '_to_close_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=merged_source, - fill_color="black") # ,legend_label="Mean Days to Close", - merged_days_to_close_glyphs.append(merged_days_to_close_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_close_mean', y=dodge(y_axis, -0.1, range=p.y_range), - text=time_unit + '_to_close_mean', y_offset=-8, x_offset=34, # 34 - text_font_size="12pt", text_color="black", - source=merged_source, text_align='center') - p.add_layout(labels) - - # mean PR length For nonmerged - not_merged_days_to_close_glyph = p.hbar(y=dodge(y_axis, 0.1, range=p.y_range), left=0, - right=time_unit + '_to_close_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=not_merged_source, - fill_color="#e84d60") # legend_label="Mean Days to Close", - not_merged_days_to_close_glyphs.append(not_merged_days_to_close_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_close_mean', y=dodge(y_axis, 0.1, range=p.y_range), - text=time_unit + '_to_close_mean', y_offset=-8, x_offset=44, - text_font_size="12pt", text_color="#e84d60", - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # if the difference between two values is less than 6.4 percent move the second one to the right 30 pixels - if (max(y_merged_data[time_unit + '_to_last_response_mean']) - max( - y_merged_data[time_unit + '_to_first_response_mean'])) < ideal_difference: - merged_x_offset = 30 - else: - merged_x_offset = 0 - - # if the difference between two values is less than 6.4 percent move the second one to the right 30 pixels - if (max(y_not_merged_data[time_unit + '_to_last_response_mean']) - max( - y_not_merged_data[time_unit + '_to_first_response_mean'])) < ideal_difference: - not_merged_x_offset = 30 - else: - not_merged_x_offset = 0 - - # if there is only one bar set the y_offsets so the labels will not overlap the bars - if len(driver_df[y_axis].unique()) == 1: - merged_y_offset = -65 - not_merged_y_offset = 45 - else: - merged_y_offset = -45 - not_merged_y_offset = 25 - - # mean time to first response - glyph = Rect(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, -0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[0]) - first_response_glyph = p.add_glyph(merged_source, glyph) - first_response_glyphs.append(first_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_first_response_mean', x_offset=0, y_offset=merged_y_offset, # -60, - text_font_size="12pt", text_color=colors[0], - source=merged_source, text_align='center') - p.add_layout(labels) - - # for nonmerged - glyph = Rect(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[0]) - first_response_glyph = p.add_glyph(not_merged_source, glyph) - first_response_glyphs.append(first_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_first_response_mean', x_offset=0, y_offset=not_merged_y_offset, - # 40, - text_font_size="12pt", text_color=colors[0], - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # mean time to last response - glyph = Rect(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, -0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[1]) - last_response_glyph = p.add_glyph(merged_source, glyph) - last_response_glyphs.append(last_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_last_response_mean', x_offset=merged_x_offset, - y_offset=merged_y_offset, # -60, - text_font_size="12pt", text_color=colors[1], - source=merged_source, text_align='center') - p.add_layout(labels) - - # for nonmerged - glyph = Rect(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[1]) - last_response_glyph = p.add_glyph(not_merged_source, glyph) - last_response_glyphs.append(last_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_last_response_mean', x_offset=not_merged_x_offset, - y_offset=not_merged_y_offset, # 40, - text_font_size="12pt", text_color=colors[1], - source=not_merged_source, text_align='center') - p.add_layout(labels) - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label = "Days to Close" - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - # adjust the starting point and ending point based on the maximum of maximum of the graph - p.x_range = Range1d(maximum / 30 * -1, maximum * 1.15) - - p.yaxis.axis_label = "Repository" if y_axis == 'repo_name' else 'Year Closed' if y_axis == 'closed_year' else '' - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - p.ygrid.grid_line_color = None - p.y_range.range_padding = 0.15 - - p.outline_line_color = None - p.toolbar.logo = None - p.toolbar_location = None - - def add_legend(location, orientation, side): - legend = Legend( - items=[ - ("Mean Days to First Response", first_response_glyphs), - ("Mean Days to Last Response", last_response_glyphs), - ("Merged Mean Days to Close", merged_days_to_close_glyphs), - ("Not Merged Mean Days to Close", not_merged_days_to_close_glyphs) - ], - - location=location, - orientation=orientation, - border_line_color="black" - # title='Example Title' - ) - p.add_layout(legend, side) - - # add_legend((150, 50), "horizontal", "center") - add_legend((10, 135), "vertical", "right") - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of days between comments for all closed pull requests per month " \ - "in four categories. These four categories are All Merged, All Not Merged, Slowest 20% Merged, " \ - "and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "mean_response_times_for_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/mean_days_between_PR_comments/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def mean_days_between_PR_comments(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - time_unit = 'Days' - x_axis = 'closed_yearmonth' - y_axis = 'average_days_between_responses' - description = "All Closed" - line_group = 'merged_flag' - num_outliers_repo_map = {} - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - pr_closed = df_tuple[df_type["pr_closed"]] - pr_closed_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_closed = filter_data(pr_closed, pr_closed_needed_columns) - - # gets pr_slow20_not_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_not_merged = df_tuple[df_type["pr_slow20_not_merged"]] - pr_slow20_not_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_slow20_not_merged = filter_data(pr_slow20_not_merged, pr_slow20_not_merged_needed_columns) - - # gets pr_slow20_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_merged = df_tuple[df_type["pr_slow20_merged"]] - pr_slow20_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_slow20_merged = filter_data(pr_slow20_merged, pr_slow20_merged_needed_columns) - - if len(pr_closed) == 0 or len(pr_slow20_not_merged) == 0 or len(pr_slow20_merged) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - try: - pr_closed['average_days_between_responses'] = pr_closed['average_time_between_responses'].map( - lambda x: x.days).astype(float) - pr_slow20_not_merged['average_days_between_responses'] = pr_slow20_not_merged[ - 'average_time_between_responses'].map(lambda x: x.days).astype(float) - pr_slow20_merged['average_days_between_responses'] = pr_slow20_merged['average_time_between_responses'].map( - lambda x: x.days).astype(float) - except: - return Response(response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - data_dict = {'All': pr_closed, 'Slowest 20%': pr_slow20_not_merged.append(pr_slow20_merged, ignore_index=True)} - - plot_width = 950 - p1 = figure(x_axis_type="datetime", - title="{}: Mean {} Between Comments by Month Closed for {} Pull Requests".format(repo_dict[repo_id], time_unit, description), - plot_width=plot_width, x_range=(data_dict["All"][x_axis].min(), data_dict["All"][x_axis].max()), plot_height=500, - toolbar_location=None) - colors = Category20[10][6:] - color_index = 0 - - glyphs = [] - - possible_maximums = [] - for data_desc, input_df in data_dict.items(): - - driver_df = input_df.copy() - - driver_df = remove_outliers(driver_df, y_axis, num_outliers_repo_map) - - driver_df = driver_df.loc[driver_df['repo_id'] == repo_id] - index = 0 - - driver_df_mean = driver_df.groupby(['repo_id', line_group, x_axis], as_index=False).mean() - - title_ending = '' - if repo_id: - title_ending += ' for Repo: {}'.format(repo_id) - - for group_num, line_group_value in enumerate(driver_df[line_group].unique(), color_index): - glyphs.append(p1.line(driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][x_axis], - driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][y_axis], - color=colors[group_num], line_width=3)) - color_index += 1 - possible_maximums.append( - max(driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][y_axis].dropna())) - for repo, num_outliers in num_outliers_repo_map.items(): - p1.add_layout( - Title(text="** {} outliers for {} were removed".format(num_outliers, repo), align="center"), - "below") - - p1.grid.grid_line_alpha = 0.3 - p1.xaxis.axis_label = 'Month Closed' - p1.xaxis.ticker.desired_num_ticks = 15 - p1.yaxis.axis_label = 'Mean {} Between Responses'.format(time_unit) - p1.legend.location = "top_left" - - legend = Legend( - items=[ - ("All Not Merged / Rejected", [glyphs[0]]), - ("All Merged / Accepted", [glyphs[1]]), - ("Slowest 20% Not Merged / Rejected", [glyphs[2]]), - ("Slowest 20% Merged / Accepted", [glyphs[3]]) - ], - - location='center_right', - orientation='vertical', - border_line_color="black" - ) - - p1.add_layout(legend, 'right') - - p1.title.text_font_size = "16px" - - p1.xaxis.axis_label_text_font_size = "16px" - p1.xaxis.major_label_text_font_size = "16px" - - p1.yaxis.axis_label_text_font_size = "16px" - p1.yaxis.major_label_text_font_size = "16px" - p1.xaxis.major_label_orientation = 45.0 - - p1.y_range = Range1d(0, max(possible_maximums) * 1.15) - - plot = p1 - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of days between comments for all" \ - " closed pull requests per month in four categories. These four categories" \ - " are All Merged, All Not Merged, Slowest 20% Merged, and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "mean_days_between_PR_comments")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/PR_time_to_first_response/'.format(AUGUR_API_VERSION), methods=["GET"]) -def PR_time_to_first_response(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - remove_outliers = str(request.args.get('remove_outliers', "true")) - - x_axis = 'pr_closed_at' - y_axis = 'days_to_first_response' - description = 'All' - group_by = 'merged_flag' - legend_position = 'top_right' - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', x_axis, group_by, y_axis] - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = pr_closed.copy() - - outliers_removed = 0 - - if remove_outliers == "true": - driver_df, outliers_removed, outlier_cutoff = remove_outliers_by_standard_deviation(driver_df, 'days_to_first_response') - - group_by_groups = sorted(driver_df[group_by].unique()) - - # setup color pallete - try: - # FIXME repo_set is not defined - colors = Colorblind[len(repo_set)] - except: - colors = Colorblind[3] - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 180 * 5 - p = figure(x_range=( - driver_df[x_axis].min() - datetime.timedelta(days=30), driver_df[x_axis].max() + datetime.timedelta(days=25)), - # (driver_df[y_axis].min(), driver_df[y_axis].max()), - toolbar_location=None, - title='{}Days to First Response for {} Closed Pull Requests'.format(title_beginning, description), - plot_width=plot_width, - plot_height=400, x_axis_type='datetime') - - for index, group_by_group in enumerate(group_by_groups): - p.scatter(x_axis, y_axis, color=colors[index], marker="square", - source=driver_df.loc[driver_df[group_by] == group_by_group], legend_label=group_by_group) - - if group_by_group == "Merged / Accepted": - merged_values = driver_df.loc[driver_df[group_by] == group_by_group][y_axis].dropna().values.tolist() - else: - not_merged_values = driver_df.loc[driver_df[group_by] == group_by_group][ - y_axis].dropna().values.tolist() - - values = not_merged_values + merged_values - - if outliers_removed > 0: - if repo_id: - p.add_layout(Title( - text="** Outliers cut off at {} days: {} outlier(s) for {} were removed **".format(outlier_cutoff, - outliers_removed, - repo_dict[ - repo_id]), - align="center"), "below") - else: - p.add_layout(Title( - text="** Outliers cut off at {} days: {} outlier(s) were removed **".format(outlier_cutoff, - outliers_removed), - align="center"), "below") - - p.xaxis.axis_label = 'Date Closed' if x_axis == 'pr_closed_at' else 'Date Created' if x_axis == 'pr_created_at' else 'Date' - p.yaxis.axis_label = 'Days to First Response' - p.legend.location = legend_position - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - if len(values) == 0: - return Response(response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # determine y_max by finding the max of the values and scaling it up a small amoutn - y_max = max(values) * 1.015 - - p.y_range = Range1d(0, y_max) - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the days to first reponse for individual pull requests, either Merged or Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "PR_time_to_first_response")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/average_PR_events_for_closed_PRs/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def average_PR_events_for_closed_PRs(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - include_comments = str(request.args.get('include_comments', True)) - - x_axis = 'closed_year' - facet = 'merged_flag' - columns = 2 - x_max = 1100 - y_axis = 'repo_name' - description = 'All Closed' - optional_comments = ['comment_count'] if include_comments else [] - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', x_axis, 'assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', facet] + optional_comments - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - colors = linear_gradient('#f5f5dc', '#fff44f', 150)['hex'] - - driver_df = pr_closed.copy() - driver_df[x_axis] = driver_df[x_axis].astype(str) - - if facet == 'closed_year' or y_axis == 'closed_year': - driver_df['closed_year'] = driver_df['closed_year'].astype(int).astype(str) - - y_groups = [ - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'referenced_count', - 'closed_count', - # 'milestoned_count', - ] + optional_comments - - optional_group_comments = ['comment'] if include_comments else [] - # y_groups = ['subscribed', 'mentioned', 'labeled', 'review_requested', 'head_ref_force_pushed', - # 'referenced', 'closed', 'merged', 'unlabeled', 'head_ref_deleted', 'milestoned', 'assigned'] - # + optional_group_comments - - x_groups = sorted(list(driver_df[x_axis].unique())) - - grid_array = [] - grid_row = [] - - for index, facet_group in enumerate(sorted(driver_df[facet].unique())): - - facet_data = driver_df.loc[driver_df[facet] == facet_group] - # display(facet_data.sort_values('merged_count', ascending=False).head(50)) - driver_df_mean = facet_data.groupby(['repo_id', 'repo_name', x_axis], as_index=False).mean().round(1) - - # if a record is field in a record is Nan then it is not counted by count() so when it is not - # 2 meaning both rows have a value, there is not enough data - if (driver_df_mean['assigned_count'].count() != 2 or driver_df_mean[ - 'review_requested_count'].count() != 2 or driver_df_mean['labeled_count'].count() != 2 or - driver_df_mean['subscribed_count'].count() != 2 or driver_df_mean['mentioned_count'].count() != 2 or - driver_df_mean['referenced_count'].count() != 2 or - driver_df_mean['closed_count'].count() != 2 or driver_df_mean[ - 'head_ref_force_pushed_count'].count() != 2 or driver_df_mean['merged_count'].count() != 2 or - driver_df_mean['milestoned_count'].count() != 2 or driver_df_mean['unlabeled_count'].count() != 2 or - driver_df_mean['head_ref_deleted_count'].count() != 2 or - driver_df_mean['comment_count'].count() != 2): - return Response(response="There is not enough data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # print(driver_df_mean.to_string()) - # data = {'Y' : y_groups} - # for group in y_groups: - # data[group] = driver_df_mean[group].tolist() - plot_width = 700 - p = figure(y_range=y_groups, plot_height=500, plot_width=plot_width, x_range=x_groups, - title='{}'.format(format(facet_group))) - - for y_group in y_groups: - driver_df_mean['field'] = y_group - source = ColumnDataSource(driver_df_mean) - mapper = LinearColorMapper(palette=colors, low=driver_df_mean[y_group].min(), - high=driver_df_mean[y_group].max()) - - p.rect(y='field', x=x_axis, width=1, height=1, source=source, - line_color=None, fill_color=transform(y_group, mapper)) - # Data label - labels = LabelSet(x=x_axis, y='field', text=y_group, y_offset=-8, - text_font_size="12pt", text_color='black', - source=source, text_align='center') - p.add_layout(labels) - - color_bar = ColorBar(color_mapper=mapper, location=(0, 0), - ticker=BasicTicker(desired_num_ticks=9), - formatter=PrintfTickFormatter(format="%d")) - # p.add_layout(color_bar, 'right') - - p.y_range.range_padding = 0.1 - p.ygrid.grid_line_color = None - - p.legend.location = "bottom_right" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - - p.xaxis.axis_label = 'Year Closed' - p.yaxis.axis_label = 'Event Type' - - p.title.align = "center" - p.title.text_font_size = "15px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - grid_row.append(p) - if index % columns == columns - 1: - grid_array.append(grid_row) - grid_row = [] - grid = gridplot(grid_array) - - # create caption plot - caption_plot = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average count of several different event types for " \ - "closed pull requests per year. It spilits the pull requests into two categories, " \ - "Merged / Accepted, and Not Merged / Rejected, so the similarities and differences are clear." - - caption_plot.add_layout(Label(x=0, y=380, x_units='screen', y_units='screen', text='{}'.format(caption), - text_font='times', text_font_size='15pt', render_mode='css')) - - # caption_plot.outline_line_color = None - caption_plot.toolbar_location = None - - # create title plot - title_plot = figure(width=plot_width, height=50, margin=(0, 0, 0, 0)) - title = '{}: Average Pull Request Event Types for {} Pull Requests'.format(repo_dict[repo_id], description) - - title_plot.add_layout(Label(x=550, y=0, x_units='screen', y_units='screen', text='{}'.format(title), - text_font='times', text_font_size='17px', - text_font_style='bold', render_mode='css')) - - # title_plot.outline_line_color = None - title_plot.toolbar_location = None - - layout = column([title_plot, grid, caption_plot], sizing_mode='scale_width') - - if return_json == "true": - var = Response(response=json.dumps(json_item(layout, "average_PR_events_for_closed_PRs")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(layout, timeout=181) # , webdriver=selenium.webdriver.firefox.webdriver) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/Average_PR_duration/'.format(AUGUR_API_VERSION), methods=["GET"]) -def Average_PR_duration(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "month")) - return_json = request.args.get('return_json', "false") - remove_outliers = str(request.args.get('remove_outliers', "true")) - - x_axis = 'repo_name' - group_by = 'merged_flag' - y_axis = 'closed_yearmonth' - description = "All Closed" - heat_field = 'pr_duration_days' - columns = 2 - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', y_axis, group_by, x_axis, 'pr_closed_at', 'pr_created_at'] - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - pr_duration_frame = pr_closed.assign(pr_duration=(pr_closed['pr_closed_at'] - pr_closed['pr_created_at'])) - pr_duration_frame = pr_duration_frame.assign( - pr_duration_days=(pr_duration_frame['pr_duration'] / datetime.timedelta(minutes=1)) / 60 / 24) - - repo_dict = {repo_id: pr_duration_frame.loc[pr_duration_frame['repo_id'] == repo_id].iloc[0]['repo_name']} - - red_green_gradient = linear_gradient('#0080FF', '#DC143C', 150)['hex'] # 32CD32 - - driver_df = pr_duration_frame.copy() - - driver_df[y_axis] = driver_df[y_axis].astype(str) - - # add new group by + xaxis column - driver_df['grouped_x'] = driver_df[x_axis] + ' - ' + driver_df[group_by] - - driver_df_mean = driver_df.groupby(['grouped_x', y_axis], as_index=False).mean() - - colors = red_green_gradient - y_groups = driver_df_mean[y_axis].unique() - x_groups = sorted(driver_df[x_axis].unique()) - grouped_x_groups = sorted(driver_df_mean['grouped_x'].unique()) - - # defualt outliers removed to 0 - outliers_removed = 0 - - if remove_outliers == "true": - driver_df_mean, outliers_removed, outlier_cutoff = remove_outliers_by_standard_deviation(driver_df_mean, - heat_field) - - values = driver_df_mean[heat_field].values.tolist() - - heat_max = max(values) * 1.02 - - mapper = LinearColorMapper(palette=colors, low=driver_df_mean[heat_field].min(), - high=heat_max) # driver_df_mean[heat_field].max()) - - source = ColumnDataSource(driver_df_mean) - title_beginning = repo_dict[repo_id] + ':' - plot_width = 1100 - p = figure(plot_width=plot_width, plot_height=300, - title="{} Mean Duration (Days) {} Pull Requests".format(title_beginning, description), - y_range=grouped_x_groups[::-1], x_range=y_groups, - toolbar_location=None, tools="") # , x_axis_location="above") - - for x_group in x_groups: - outliers = driver_df_mean.loc[ - (driver_df_mean[heat_field] > heat_max) & (driver_df_mean['grouped_x'].str.contains(x_group))] - - if outliers_removed > 0: - p.add_layout(Title( - text="** Outliers capped at {} days: {} outlier(s) for {} were capped at {} **".format( - outlier_cutoff, outliers_removed, x_group, outlier_cutoff), align="center"), "below") - - p.rect(x=y_axis, y='grouped_x', width=1, height=1, source=source, - line_color=None, fill_color=transform(heat_field, mapper)) - - color_bar = ColorBar(color_mapper=mapper, location=(0, 0), - ticker=BasicTicker(desired_num_ticks=9), - formatter=PrintfTickFormatter(format="%d")) - - p.add_layout(color_bar, 'right') - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.axis.axis_line_color = None - p.axis.major_tick_line_color = None - p.axis.major_label_text_font_size = "11pt" - p.axis.major_label_standoff = 0 - p.xaxis.major_label_orientation = 1.0 - p.xaxis.axis_label = 'Month Closed' if y_axis[0:6] == 'closed' else 'Date Created' if y_axis[ - 0:7] == 'created' else 'Repository' if y_axis == 'repo_name' else '' - # p.yaxis.axis_label = 'Merged Status' - - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "14px" - - p.yaxis.major_label_text_font_size = "15px" - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average duration of all closed pull requests. " \ - "Red represents a slow response relative to the others, while blue a light blue " \ - "represents a fast response relative to the others. Blank cells represents months " \ - "without pull requests." - p = add_caption_to_plot(p, caption) - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "Average_PR_duration")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - # newt = get_screenshot_as_png(grid, timeout=180, webdriver=selenium.webdriver.firefox.webdriver) - # filename = export_png(grid, timeout=180, webdriver=selenium.webdriver.firefox.webdriver) - filename = export_png(grid, timeout=180) - - # return sendfile(newt) - return send_file(filename) From 2e60ffb4fc331ab0f603628984ed784f53d584b3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 30 Sep 2025 19:42:22 -0400 Subject: [PATCH 022/105] remove reports definition Signed-off-by: Adrian Edwards --- augur/api/view/init.py | 57 ------------------------------------------ 1 file changed, 57 deletions(-) diff --git a/augur/api/view/init.py b/augur/api/view/init.py index 869b383a62..2a4ce44191 100644 --- a/augur/api/view/init.py +++ b/augur/api/view/init.py @@ -33,63 +33,6 @@ def write_settings(current_settings): with open(configFile, 'w') as file: yaml.dump(current_settings, file) -# default reports definition -reports = { - "pull_request_reports":[ - { - "url":"average_commits_per_PR", - "description":"Average commits per pull request" - }, - { - "url":"average_comments_per_PR", - "description":"Average comments per pull request" - }, - { - "url":"PR_counts_by_merged_status", - "description":"Pull request counts by merged status" - }, - { - "url":"mean_response_times_for_PR", - "description":"Mean response times for pull requests" - }, - { - "url":"mean_days_between_PR_comments", - "description":"Mean days between pull request comments" - }, - { - "url":"PR_time_to_first_response", - "description":"Pull request time until first response" - }, - { - "url":"average_PR_events_for_closed_PRs", - "description":"Average pull request events for closed pull requests" - }, - { - "url":"Average_PR_duration", - "description":"Average pull request duration" - } - ], - "contributor_reports":[ - { - "url":"new_contributors_bar", - "description":"New contributors bar graph" - }, - { - "url":"returning_contributors_pie_chart", - "description":"Returning contributors pie chart" - } - ], - "contributor_reports_stacked":[ - { - "url":"new_contributors_stacked_bar", - "description":"New contributors stacked bar chart" - }, - { - "url":"returning_contributors_stacked_bar", - "description":"Returning contributors stacked bar chart" - } - ] -} # Initialize logging def init_logging(): From a28c7b4187cfca644a926cbb59c14a5749ad4d9d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 30 Sep 2025 19:43:02 -0400 Subject: [PATCH 023/105] additional reports related things Signed-off-by: Adrian Edwards --- augur/api/view/init.py | 3 - augur/api/view/utils.py | 80 ------------------- .../create-a-metric/api-development.rst | 4 - 3 files changed, 87 deletions(-) diff --git a/augur/api/view/init.py b/augur/api/view/init.py index 2a4ce44191..1737131352 100644 --- a/augur/api/view/init.py +++ b/augur/api/view/init.py @@ -19,9 +19,6 @@ def init_settings(): settings["cache_expiry"] = 604800 settings["serving"] = "http://augur.chaoss.io/api/unstable" settings["pagination_offset"] = 25 - # Put reports.yml in the same directory as the config file - config_dir = configFile.parent - settings["reports"] = os.path.join(config_dir, "reports.yml") settings["session_key"] = secrets.token_hex() def write_settings(current_settings): diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index aae5140cd7..dbfdd1b121 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -70,34 +70,6 @@ def getSetting(key, section = "View"): #version_check(settings) -""" ---------------------------------------------------------------- -""" -def loadReports(): - global reports - try: - with open(getSetting("reports")) as file: - reports = yaml.load(file, Loader=yaml.FullLoader) - id = -1 - for report in reports: - for image in reports[report]: - image['id'] = id = id + 1 - return True - except Exception as err: - logger.error(f"An exception occurred reading reports endpoints from [{getSetting('reports')}]:") - logger.error(err) - try: - with open(getSetting("reports"), 'w') as file: - logger.info("Attempting to generate default reports.yml") - yaml.dump(reports, file) - logger.info("Default reports file successfully generated.") - except Exception as ioErr: - logger.error("Error creating default report configuration:") - logger.error(ioErr) - return False - -if not loadReports(): - loadReports() - cache_files_requested = [] """ ---------------------------------------------------------------- @@ -160,58 +132,6 @@ def download(url, cmanager, filename, image_cache, image_id, repo_id = None): logger.error("An exception occurred writing a cache file to disk") logger.error(err) -""" ---------------------------------------------------------------- -""" -def requestReports(repo_id): - # If this request has already been fulfilled, no need to process it again - if(repo_id in report_requests.keys()): - return - - # initialize a new request entry to hold the resulting data - report_requests[repo_id] = {} - report_requests[repo_id]['complete'] = False - - host = getSetting("host", "Server") - port = getSetting("port", "Server") - - """ ---------- - If the report definition could not be loaded, we cannot determine what - files to request from the backend to compose the report. Returning here - causes the completion status of the request to be False, which will - display an error message when sent to the frontend. - """ - if reports is None: - return - - threadPools = [] - reportImages = {} - for report in reports: - # Reports is a dictionary of lists, so we get the size of each list - size = len(reports[report]) - - # Set up various threading components to manage image downloading - connection_mgr = urllib3.PoolManager(maxsize=size) - thread_pool = ThreadPoolExecutor(size) - threadPools.append(thread_pool) - - for image in reports[report]: - # Where should the downloaded image be stored (in cache) - filename = toCacheFilename(f"{image['url']}?repo_id={repo_id}") - # Where are we downloading the image from - image_url = f"{host}:{port}" + url_for(image['url'], repo_id = repo_id) - # f"{getSetting('serving')}/{image['url']}?repo_id={repo_id}" - - # Add a request for this image to the thread pool using the download function - thread_pool.submit(download, image_url, connection_mgr, filename, reportImages, image['id'], repo_id) - - # Wait for all connections to resolve, then clean up - for thread_pool in threadPools: - thread_pool.shutdown() - - report_requests[repo_id]['images'] = reportImages - - # Remove the request from the queue when completed - report_requests[repo_id]['complete'] = True """ ---------------------------------------------------------------- renderRepos: diff --git a/docs/source/development-guide/create-a-metric/api-development.rst b/docs/source/development-guide/create-a-metric/api-development.rst index 834b42e8e0..05e1ebb977 100644 --- a/docs/source/development-guide/create-a-metric/api-development.rst +++ b/docs/source/development-guide/create-a-metric/api-development.rst @@ -133,10 +133,6 @@ There is also, generally, a block in a standard metric for pulling data by a rep 'begin_date': begin_date, 'end_date': end_date}) return results -Existing Visualization Metrics Files: --------------------------------------------- -1. augur/routes/contributor_reports.py -2. augur/routes/pull_request_reports.py Existing Metrics Files: -------------------------------------------- From 148680b3a581e29199a3277082cc41ba9dacf7ac Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 7 Oct 2025 08:15:33 +0100 Subject: [PATCH 024/105] basic implementation Signed-off-by: Adrian Edwards --- augur/application/config.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index ee3c33dc8b..ab8bb4e93d 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -5,6 +5,7 @@ import os from augur.application.db.models import Config from augur.application.db.util import execute_session_query, convert_type_of_value +from pathlib import Path def get_development_flag_from_config(): @@ -122,7 +123,11 @@ def __init__(self, logger, session: DatabaseSession): self.logger = logger self.accepted_types = ["str", "bool", "int", "float", "NoneType"] - self.default_config = default_config + config_path = Path("./augur.json") + if config_path.exists(): + self.default_config = json.loads(config_path.read_text(encoding="UTF-8")) + else: + self.default_config = default_config def get_section(self, section_name) -> dict: """Get a section of data from the config. From d13aca8cbed396ff26b0f021c0761e675d673e93 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 15 Oct 2025 20:23:47 +0100 Subject: [PATCH 025/105] introduce a config datadir item and use that so that config can be in a standard location going forward Signed-off-by: Adrian Edwards --- augur/application/config.py | 3 ++- docker-compose.yml | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index ab8bb4e93d..2cc6f65cdb 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -123,7 +123,8 @@ def __init__(self, logger, session: DatabaseSession): self.logger = logger self.accepted_types = ["str", "bool", "int", "float", "NoneType"] - config_path = Path("./augur.json") + config_dir = Path(os.getenv("CONFIG_DATADIR", "./")) + config_path = config_dir.joinpath("augur.json") if config_path.exists(): self.default_config = json.loads(config_path.read_text(encoding="UTF-8")) else: diff --git a/docker-compose.yml b/docker-compose.yml index b32f0a1696..f0ef41015b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,6 +73,7 @@ services: - REDIS_CONN_STRING=redis://redis:6379 - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-augur_vhost} - CONFIG_LOCATION=/config/config.yml + - CONFIG_DATADIR=/config - CACHE_DATADIR=/cache - CACHE_LOCKDIR=/cache - CELERYBEAT_SCHEDULE_DB=/tmp/celerybeat-schedule.db From 4887e3ebd5d1483e1febdbee9940cd8367396048 Mon Sep 17 00:00:00 2001 From: Sajal-Kulshreshtha Date: Sat, 11 Oct 2025 17:55:54 +0530 Subject: [PATCH 026/105] Centralize versioning Signed-off-by: Sajal-Kulshreshtha --- .github/workflows/build_docker.yml | 11 +++++++++++ docker/backend/Dockerfile | 4 +++- docker/database/Dockerfile | 4 +++- docker/keyman/Dockerfile | 4 +++- docker/rabbitmq/Dockerfile | 4 +++- scripts/ci/get_version.py | 7 +++++++ 6 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 scripts/ci/get_version.py diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index e7fa4b262c..7828019514 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -58,6 +58,13 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 + - name: Extract project version + id: version + run: | + VERSION=$(python -c "import re; exec(open('metadata.py').read()); print(__version__)") + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "Using version: $VERSION" + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 id: setup-buildx @@ -67,6 +74,7 @@ jobs: with: context: . file: ./docker/database/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_database:test cache-from: type=gha,scope=container-database @@ -78,6 +86,7 @@ jobs: with: context: . file: ./docker/keyman/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_keyman:test cache-from: type=gha,scope=container-keyman @@ -89,6 +98,7 @@ jobs: with: context: . file: ./docker/rabbitmq/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_rabbitmq:test cache-from: type=gha,scope=container-rabbitmq @@ -100,6 +110,7 @@ jobs: with: context: . file: ./docker/backend/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_backend:test cache-from: type=gha,scope=container-backend diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 0a05daf848..5a8bfaaa3e 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -20,7 +20,9 @@ RUN go install github.com/ossf/scorecard/v5@v5.1.1 \ FROM python:3.11-slim-bullseye LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.3" + +ARG VERSION +LABEL version=${VERSION} ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/bin/:/usr/local/bin:/usr/lib:${PATH}" diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index 6558fe44ec..e4393fe0b3 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -2,7 +2,9 @@ FROM postgres:16 LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.3" + +ARG VERSION +LABEL version=${VERSION} ENV POSTGRES_DB="test" ENV POSTGRES_USER="augur" diff --git a/docker/keyman/Dockerfile b/docker/keyman/Dockerfile index 72c46ba225..3fe1996223 100644 --- a/docker/keyman/Dockerfile +++ b/docker/keyman/Dockerfile @@ -1,7 +1,9 @@ FROM python:3.11.12-alpine LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.3" + +ARG VERSION +LABEL version=${VERSION} RUN pip install --no-cache-dir --upgrade pip diff --git a/docker/rabbitmq/Dockerfile b/docker/rabbitmq/Dockerfile index ad86dfebb7..387eb9ae77 100644 --- a/docker/rabbitmq/Dockerfile +++ b/docker/rabbitmq/Dockerfile @@ -1,7 +1,9 @@ FROM rabbitmq:3.12-management-alpine LABEL maintainer="574/augur@simplelogin.com" -LABEL version="0.90.0" + +ARG VERSION +LABEL version=${VERSION} ARG RABBIT_MQ_DEFAULT_USER=augur ARG RABBIT_MQ_DEFAULT_PASSWORD=password123 diff --git a/scripts/ci/get_version.py b/scripts/ci/get_version.py new file mode 100644 index 0000000000..e98d520602 --- /dev/null +++ b/scripts/ci/get_version.py @@ -0,0 +1,7 @@ +import sys +import os + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) +from metadata import __version__ + +print(__version__) From 3f2768df10052997709a3c757b4a1be07dd3bb00 Mon Sep 17 00:00:00 2001 From: Sajal-Kulshreshtha Date: Wed, 15 Oct 2025 01:36:59 +0530 Subject: [PATCH 027/105] docs: update release process to reflect centralized version management Signed-off-by: Sajal-Kulshreshtha --- docs/source/procedures/creating-releases.rst | 100 +++++++++++++++---- 1 file changed, 80 insertions(+), 20 deletions(-) diff --git a/docs/source/procedures/creating-releases.rst b/docs/source/procedures/creating-releases.rst index 007db72441..7ef2b32a9c 100644 --- a/docs/source/procedures/creating-releases.rst +++ b/docs/source/procedures/creating-releases.rst @@ -2,49 +2,109 @@ The Augur Release Process ========================= The first step to releasing any changes is to have changes in the first place. -Augur's `CONTRIBUTING.md `__ file contains all the information that is needed to get started with topics like reporting issues, contributing code, and understanding the code review process. +Augur's `CONTRIBUTING.md `__ file +contains all the information that is needed to get started with topics like +reporting issues, contributing code, and understanding the code review process. This document outlines how these changes end up in an Augur release after they are merged into the `main` branch. +Release Workflow +---------------- - -Release workflow: -Starting after version 0.89.3, Augur follows a workflow similar to those you may already be familiar with (such as github flow and git flow). The Augur workflow has two long-lived branches, `main` and `release` and is designed such that changes only flow in one direction - from main into release. +Starting after version **0.89.3**, Augur follows a workflow similar to those you may already +be familiar with (such as GitHub Flow and Git Flow). The Augur workflow has two long-lived branches, +`main` and `release`, and is designed such that changes only flow in one direction — from `main` into `release`. Branches +-------- + +**main** -`main` -The `main` branch is the primary development branch that is the target for all new pull requests. At any given point in time, this branch represents the best approximation of what the next upcoming release will look like. Since this is the active development branch, changes happen more frequently and this branch should be considered to be less stable than the `release` branch due to the possibility of breaking changes being made (and potentially reverted) between releases. It is not recommended for production deployment and is primarily intended for use by Augur contributors running their own copies against test data for development purposes. +The `main` branch is the primary development branch that is the target for all new pull requests. +At any given point in time, this branch represents the best approximation of what the next upcoming +release will look like. Since this is the active development branch, changes happen more frequently +and this branch should be considered to be less stable than the `release` branch due to the possibility +of breaking changes being made (and potentially reverted) between releases. It is not recommended for +production deployment and is primarily intended for use by Augur contributors running their own copies +against test data for development purposes. -`release` -The `release` branch is where all augur versions (after 0.89.3) are tagged. Each commit on this branch represents either a hotfix to the prior release or a new major or minor version. +**release** -Currently, Augur only officially supports the last-released version represented by the latest **release** tag. In most cases, the latest commit on the `release` branch is made immediately prior to a release, but always rely on the latest tagged release, not the release branch in production. +The `release` branch is where all Augur versions (after 0.89.3) are tagged. Each commit on this branch +represents either a hotfix to the prior release or a new major or minor version. + +Currently, Augur only officially supports the last-released version represented by the latest **release** tag. +In most cases, the latest commit on the `release` branch is made immediately prior to a release, but always rely +on the latest tagged release, not the `release` branch in production. .. note:: - If future needs require supporting multiple Augur versions concurrently, individual numbered release branches may be made from this central `release`` branch to allow any hotfixes to be applied to each supported version independently of the others. + If future needs require supporting multiple Augur versions concurrently, individual numbered + release branches may be made from this central `release` branch to allow any hotfixes to be applied + to each supported version independently of the others. The Release Process +------------------- + +When the next release is set to be cut, some preparation steps need to take place first. These include: + +- Ensuring all features planned for that release are merged, and any unrelated changes are delayed (as appropriate) until after the release. +- Creating a Pull Request to update any applicable metadata (such as version information and changelogs) on the `main` branch. + +Version Management (Updated) +---------------------------- + +Starting from version **0.90.0**, Augur now uses a **single source of truth** for its version information, +defined in `metadata.py`. + +Previously, the version number needed to be manually updated in several different places during a release, including: -When the next release is set to be cut, some preparation steps need to take place first, these include: -- Ensuring all features planned for that release are merged and any unrelated changes are delayed (as appropriate) until after the release. -- Creating a Pull Request to update any applicable metadata (such as version information and Changelogs) on the `main` branch. +- ``pyproject.toml`` (for Python packaging) +- Dockerfiles (used for building and tagging images) +- GitHub Actions workflow files (e.g., ``.github/workflows/build_docker.yml``) +- Any scripts or documentation pages referencing specific versions -Once all release preparation has been completed, a new Pull Request can be created to merge the main branch into the `release 'branch. This creates a final review opportunity and allows for another run of (potentially more stringent) CI jobs compared to those run on `main`, catching issues that may have come up throughout the various merges or in the process of preparing for release. +This manual process increased the chance of version mismatches between code, Docker images, and releases. -After this PR is merged, a tag is created that points to the commit on the `release` branch, effectively labeling it so that it can be returned to later if needed. This labeling process can also be the basis for additional CI jobs that build and upload the released code to distribution platforms such as Docker Hub or the GitHub Container Registry +Now, this has been **fully centralized**: + +- The version number is declared once in ``metadata.py`` as ``__version__``. +- A helper script ``get_version.py`` reads this value and dynamically injects it into Docker builds via a build argument. +- The CI/CD pipeline (GitHub Actions) also reads the same version from ``metadata.py`` when tagging builds and Docker images. + +This ensures that all parts of Augur — including Python packaging, Docker images, and release artifacts — +use the **exact same version**, automatically. + +Therefore, before tagging a new release, only the version in ``metadata.py`` needs to be updated. +All other build and deployment steps automatically consume this version during the release process. + +Once all release preparation has been completed, a new Pull Request can be created to merge the `main` +branch into the `release` branch. This creates a final review opportunity and allows for another run of +(potentially more stringent) CI jobs compared to those run on `main`, catching issues that may have come up +throughout the various merges or during the process of preparing for release. + +After this PR is merged, a tag is created that points to the commit on the `release` branch, +effectively labeling it so that it can be returned to later if needed. This labeling process can +also be the basis for additional CI jobs that build and upload the released code to distribution +platforms such as Docker Hub or the GitHub Container Registry. Why? +---- This is done to solve a number of problems: -- having changes moving in two directions at once (i.e. features coming from main, and hotfixes coming from release) was often confusing and increased the odds that a change would be missed, such as being shipped as a hotfix but not merged into the main codebase - leading to a regression in the next release. +- Having changes moving in two directions at once (i.e. features coming from `main`, and hotfixes coming from `release`) + was often confusing and increased the odds that a change would be missed, such as being shipped as a hotfix + but not merged into the main codebase — leading to a regression in the next release. + + +Special Case: Hotfixes +---------------------- +If the fix is a hotfix: -Special case: Hotfixes -if the fix was a hotfix: -- changelog updates and other metadata changes should be included as part of the PR -- this is where mergeify or something helps re-create the PR targeting the release branch directly. at which point the release process is followed +- Changelog updates and other metadata changes should be included as part of the PR. +- This is where tools like **Mergeify** can help re-create the PR targeting the `release` branch directly, + at which point the regular release process is followed. From fb88a929ed16ab5540eb9b6f67fd2c9c931e0ab0 Mon Sep 17 00:00:00 2001 From: Sajal-Kulshreshtha Date: Wed, 15 Oct 2025 23:43:15 +0530 Subject: [PATCH 028/105] removed unused import and script Signed-off-by: Sajal-Kulshreshtha --- .github/workflows/build_docker.yml | 2 +- scripts/ci/get_version.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) delete mode 100644 scripts/ci/get_version.py diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index 7828019514..75590ac0ad 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -61,7 +61,7 @@ jobs: - name: Extract project version id: version run: | - VERSION=$(python -c "import re; exec(open('metadata.py').read()); print(__version__)") + VERSION=$(python -c "exec(open('metadata.py').read()); print(__version__)") echo "version=$VERSION" >> $GITHUB_OUTPUT echo "Using version: $VERSION" diff --git a/scripts/ci/get_version.py b/scripts/ci/get_version.py deleted file mode 100644 index e98d520602..0000000000 --- a/scripts/ci/get_version.py +++ /dev/null @@ -1,7 +0,0 @@ -import sys -import os - -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) -from metadata import __version__ - -print(__version__) From cd87f5cbe421e216ee152f489ad470518747ae2d Mon Sep 17 00:00:00 2001 From: saksham23467 <142910439+saksham23467@users.noreply.github.com> Date: Thu, 31 Jul 2025 17:57:26 +0530 Subject: [PATCH 029/105] Add Clones metric API (#2604) Signed-off-by: saksham23467 <142910439+saksham23467@users.noreply.github.com> --- augur/api/metrics/repo_meta.py | 56 ++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/augur/api/metrics/repo_meta.py b/augur/api/metrics/repo_meta.py index ffc8fc84ef..c39922e17b 100644 --- a/augur/api/metrics/repo_meta.py +++ b/augur/api/metrics/repo_meta.py @@ -1240,3 +1240,59 @@ def aggregate_summary(repo_group_id, repo_id=None, begin_date=None, end_date=Non results = pd.read_sql(summarySQL, conn, params={'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date}) return results + +@register_metric() +def clones(repo_group_id, repo_id=None, begin_date=None, end_date=None): + """ + Returns the number of repository clones (total and unique) for a given repo or repo group. + :param repo_group_id: The repository's repo_group_id + :param repo_id: The repository's repo_id, defaults to None + :param begin_date: Start date for filtering clone data (optional) + :param end_date: End date for filtering clone data (optional) + :return: DataFrame of clone counts (total and unique) per day + """ + if not begin_date: + begin_date = '1970-1-1 00:00:00' + if not end_date: + end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + if repo_id: + clones_sql = s.sql.text(""" + SELECT + repo_id, + clone_data_timestamp AS date, + count_clones AS total_clones, + unique_clones + FROM augur_data.repo_clones_data + WHERE repo_id = :repo_id + AND clone_data_timestamp BETWEEN :begin_date AND :end_date + ORDER BY clone_data_timestamp + """) + with current_app.engine.connect() as conn: + results = pd.read_sql(clones_sql, conn, params={ + 'repo_id': repo_id, + 'begin_date': begin_date, + 'end_date': end_date + }) + return results + else: + clones_sql = s.sql.text(""" + SELECT + repo_id, + clone_data_timestamp AS date, + count_clones AS total_clones, + unique_clones + FROM augur_data.repo_clones_data + WHERE repo_id IN ( + SELECT repo_id FROM augur_data.repo WHERE repo_group_id = :repo_group_id + ) + AND clone_data_timestamp BETWEEN :begin_date AND :end_date + ORDER BY repo_id, clone_data_timestamp + """) + with current_app.engine.connect() as conn: + results = pd.read_sql(clones_sql, conn, params={ + 'repo_group_id': repo_group_id, + 'begin_date': begin_date, + 'end_date': end_date + }) + return results From c088b8118634fa8a2094f2088179161e202f7aa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Fri, 17 Oct 2025 19:47:58 +0200 Subject: [PATCH 030/105] Format formatting of wait until time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current formatting of time in "sleeping until" message doesn't including leading zeros for single digits minutes (and hours), which is odd: ```python >>> import time >>> wait_until_time = time.localtime(1760684520) >>> f"sleeping until {wait_until_time.tm_hour}:{wait_until_time.tm_min}" 'sleeping until 8:2' >>> f"sleeping until {wait_until_time.tm_hour:02d}:{wait_until_time.tm_min:02d}" 'sleeping until 08:02' ``` Signed-off-by: Mosè Giordano <765740+giordano@users.noreply.github.com> --- augur/application/db/models/augur_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 9212bcc5e9..9bb8ef9104 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -939,7 +939,7 @@ def is_valid_github_repo(gh_session, url: str) -> bool: ) wait_until_time = localtime(wait_until) logger.error(f"rate limited fetching {url}") - logger.error(f"sleeping until {wait_until_time.tm_hour}:{wait_until_time.tm_min} ({wait_in_seconds} seconds)") + logger.error(f"sleeping until {wait_until_time.tm_hour:02d}:{wait_until_time.tm_min:02d} ({wait_in_seconds} seconds)") sleep(wait_in_seconds) attempts+=1 continue @@ -3600,4 +3600,4 @@ class RepoClone(Base): count_clones = Column(BigInteger) clone_data_timestamp = Column(TIMESTAMP(precision=6)) - repo = relationship("Repo") \ No newline at end of file + repo = relationship("Repo") From d5bb5391ee7b6b42534062c59b6fa7b2b622ed9a Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Tue, 21 Oct 2025 18:39:17 -0500 Subject: [PATCH 031/105] Remove extraneous log statement Signed-off-by: Ulincsys --- augur/application/db/lib.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index a82c97dd66..52efee87ee 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -217,8 +217,6 @@ def facade_bulk_insert_commits(logger, records): session.rollback() if len(records) > 1: - logger.error(f"Ran into issue when trying to insert commits \n Error: {e}") - #split list into halves and retry insert until we isolate offending record firsthalfRecords = records[:len(records)//2] secondhalfRecords = records[len(records)//2:] From 058120b296f5fb8b3a3ec3508fa80350df9e2c92 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Wed, 22 Oct 2025 07:39:00 -0400 Subject: [PATCH 032/105] copy podman test cleanup step to docker build as well Add step to remove unnecessary files from Docker image Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- .github/workflows/build_docker.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index 75590ac0ad..2f82922617 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -55,6 +55,11 @@ jobs: name: End-to-end test (Docker) runs-on: ubuntu-latest steps: + - name: Remove unnecessary files from the base image + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Checkout repository uses: actions/checkout@v4 From ca257b63314653f59927355f5b20515d9d127eaa Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelrazek Date: Tue, 21 Oct 2025 22:01:40 +0100 Subject: [PATCH 033/105] fix typo Signed-off-by: Mahmoud Abdelrazek --- augur/application/db/models/augur_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 9bb8ef9104..c80077d9b6 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -2885,7 +2885,7 @@ class PullRequestAssignee(Base): @classmethod def from_github(cls, assignee, repo_id, tool_source, tool_version, data_source): - pr_assignee_ojb = cls() + pr_assignee_obj = cls() # store the pr_url data on in the pr assignee data for now so we can relate it back to a pr later pr_assignee_obj.contrib_id = assignee["cntrb_id"] From 548f4298366d98b4eb3904031f6e719cd628001c Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Wed, 22 Oct 2025 21:39:25 +0530 Subject: [PATCH 034/105] Fixing the API Visibility issue in debug mode w/signoff Signed-off-by: PredictiveManish --- augur/tasks/util/random_key_auth.py | 68 ++++++++++++++++------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index f2fea35b36..d1f6bd3435 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -3,23 +3,31 @@ from httpx import Auth, Request, Response from random import choice +import hashlib +def mask_key(key: str, first: int = 6, last: int = 3, stars: int = 6) -> str: + """Mask key except for the first and last few characters.""" + if not isinstance(key, str) or len(key) <= (first + last): + return "*" * stars + return f"{key[:first]}{'*' * stars}{key[-last:]}" + + +def key_fingerprint(key: str, length: int = 12) -> str: + """Return a short non-reversible fingerprint of the key for correlation.""" + h = hashlib.sha256(key.encode("utf-8")).hexdigest() + return h[:length] + class RandomKeyAuth(Auth): - """Custom Auth class for httpx that randomly assigns an api key to each request + """Custom Auth class for httpx that randomly assigns an API key to each request. Attributes: - list_of_keys ([str]): list of keys which are randomly selected from on each request - header_name (str): name of header that the keys need to be set to - key_format (str): format string that defines the structure of the key and leaves a {} for the key to be inserted + list_of_keys (List[str]): list of keys to choose from + header_name (str): name of header to set the key into + key_format (str): optional format string with {0} placeholder for key """ - - # pass a list of keys that are strings - # pass the name of the header that you would like to be set on the request - # Optionally pass the key_format. This is a string that contains a {} so the key can be added and applied to the header in the correct way. - # For example on github the keys are formatted like "token asdfasfdasf" where asdfasfdasf is the key. So for github - # the key_format="token {0}" + def __init__(self, list_of_keys: List[str], header_name: str, logger, key_format: Optional[str] = None): self.list_of_keys = list_of_keys self.header_name = header_name @@ -27,27 +35,27 @@ def __init__(self, list_of_keys: List[str], header_name: str, logger, key_format self.logger = logger def auth_flow(self, request: Request) -> Generator[Request, Response, None]: - - # the choice function is from the random library, and gets a random value from a list - # this gets a random key from the list - - if self.list_of_keys: - key_value = choice(self.list_of_keys) - self.logger.debug(f'Key value used in request: {key_value}') - # formats the key string into a format GitHub will accept - - if self.key_format: - key_string = self.key_format.format(key_value) - else: - key_string = key_value - - # set the headers of the request with the new key - request.headers[self.header_name] = key_string - #self.logger.info(f"List of Keys: {self.list_of_keys}") - + """Attach a randomly selected API key to the request headers.""" + if not self.list_of_keys: + self.logger.error("No valid keys available to make a request.") + yield request + return + + key_value = choice(self.list_of_keys) + + # Log only masked or hashed form, never the full key + masked = mask_key(key_value) + fingerprint = key_fingerprint(key_value) + self.logger.debug(f"Key used for request (masked): {masked} | fingerprint: {fingerprint}") + + # Apply formatting if needed + if self.key_format: + key_string = self.key_format.format(key_value) else: - self.logger.error(f"There are no valid keys to make a request with: {self.list_of_keys}") + key_string = key_value - # sends the request back with modified headers + # Set header + request.headers[self.header_name] = key_string + # sends the request back with modified headers # basically it saves our changes to the request object yield request From 17ec742e5dcc6d0df9a03fc46be155adcef69330 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Wed, 22 Oct 2025 21:54:37 +0530 Subject: [PATCH 035/105] Fixing the warnings in #3183 w/signoff Signed-off-by: PredictiveManish --- docs/source/conf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 45966f19ec..e925a59ffd 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,13 +19,15 @@ # import os import sys -import sphinx_rtd_theme + here = os.path.abspath(os.path.dirname(__file__)) -exec(open(os.path.join(here, "../../metadata.py")).read()) +# Add the project root (two levels up: docs/source → augur) +sys.path.insert(0, os.path.abspath(os.path.join(here, '../..'))) -sys.path.insert(0, os.path.abspath('../../../augur')) +# Now import metadata +from metadata import __copyright__, __release__, __version__ # -- General configuration ------------------------------------------------ From 6d45241abe72d1386809f19f0dc6945674fe40a1 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Thu, 23 Oct 2025 09:40:49 +0530 Subject: [PATCH 036/105] Fixing warnings in #3183 w/signoff Signed-off-by: PredictiveManish --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index e925a59ffd..cf9c6ec7a8 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,7 +26,7 @@ # Add the project root (two levels up: docs/source → augur) sys.path.insert(0, os.path.abspath(os.path.join(here, '../..'))) -# Now import metadata +# Now importing variables from metadata.py from metadata import __copyright__, __release__, __version__ # -- General configuration ------------------------------------------------ From 0b86eff350fc1de76608cea49c46327865419592 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Fri, 24 Oct 2025 12:20:40 +0530 Subject: [PATCH 037/105] Reverting changes Signed-off-by: PredictiveManish --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index cf9c6ec7a8..94921bd5d1 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,7 +19,7 @@ # import os import sys - +import sphinx_rtd_theme here = os.path.abspath(os.path.dirname(__file__)) From 27049bb0c309af4cbf9cf2766e79e0ec98b43170 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 24 Oct 2025 11:00:15 -0400 Subject: [PATCH 038/105] allow materialized view refresh to be disabled Signed-off-by: Adrian Edwards --- augur/tasks/init/celery_app.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index db8d2239d4..d1209fadd0 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -241,8 +241,11 @@ def setup_periodic_tasks(sender, **kwargs): sender.add_periodic_task(thirty_days_in_seconds, non_repo_domain_tasks.s()) mat_views_interval = int(config.get_value('Celery', 'refresh_materialized_views_interval_in_days')) - logger.info(f"Scheduling refresh materialized view every night at 1am CDT") - sender.add_periodic_task(datetime.timedelta(days=mat_views_interval), refresh_materialized_views.s()) + if mat_views_interval > 0: + logger.info(f"Scheduling refresh materialized view every night at 1am CDT") + sender.add_periodic_task(datetime.timedelta(days=mat_views_interval), refresh_materialized_views.s()) + else: + logger.info(f"Refresh materialized view task is disabled.") # logger.info(f"Scheduling update of collection weights on midnight each day") # sender.add_periodic_task(crontab(hour=0, minute=0),augur_collection_update_weights.s()) From 8ad15bd833917944544345c88b2d6076279bc4a0 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sat, 25 Oct 2025 14:06:09 +0530 Subject: [PATCH 039/105] Reverting the unnecessary changes Signed-off-by: PredictiveManish --- augur/tasks/util/random_key_auth.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index d1f6bd3435..7af17d81b9 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -20,12 +20,12 @@ def key_fingerprint(key: str, length: int = 12) -> str: class RandomKeyAuth(Auth): - """Custom Auth class for httpx that randomly assigns an API key to each request. + """Custom Auth class for httpx that randomly assigns an api key to each request. Attributes: - list_of_keys (List[str]): list of keys to choose from - header_name (str): name of header to set the key into - key_format (str): optional format string with {0} placeholder for key + list_of_keys (List[str]): list of keys which are randomly selected from on each request + header_name (str): name of header that the keys need to be set to + key_format (str): format string that defines the structure of the key and leaves a {} for the key to be inserted """ def __init__(self, list_of_keys: List[str], header_name: str, logger, key_format: Optional[str] = None): From 67000888c7d61a2d369bd9cf5a543d4b90cf5ee1 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sat, 25 Oct 2025 14:11:22 +0530 Subject: [PATCH 040/105] Moving mask_key() to augur.util Signed-off-by: PredictiveManish --- augur/tasks/util/random_key_auth.py | 8 +------- augur/util/keys.py | 5 +++++ 2 files changed, 6 insertions(+), 7 deletions(-) create mode 100644 augur/util/keys.py diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index 7af17d81b9..5d075f8202 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -4,13 +4,7 @@ from httpx import Auth, Request, Response from random import choice import hashlib - - -def mask_key(key: str, first: int = 6, last: int = 3, stars: int = 6) -> str: - """Mask key except for the first and last few characters.""" - if not isinstance(key, str) or len(key) <= (first + last): - return "*" * stars - return f"{key[:first]}{'*' * stars}{key[-last:]}" +from augur.util.keys import mask_key def key_fingerprint(key: str, length: int = 12) -> str: diff --git a/augur/util/keys.py b/augur/util/keys.py new file mode 100644 index 0000000000..31ef63d0cb --- /dev/null +++ b/augur/util/keys.py @@ -0,0 +1,5 @@ +def mask_key(key: str, first: int = 6, last: int = 3, stars: int = 6) -> str: + """Mask key except for the first and last few characters.""" + if not isinstance(key, str) or len(key) <= (first + last): + return "*" * stars + return f"{key[:first]}{'*' * stars}{key[-last:]}" \ No newline at end of file From bc9bd96969a67a05f09b7b47323a7f2f9d73a36b Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sat, 25 Oct 2025 14:15:02 +0530 Subject: [PATCH 041/105] Removed key_fingerprint for easy interpretation Signed-off-by: PredictiveManish --- augur/tasks/util/random_key_auth.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index 5d075f8202..dc59544aef 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -7,12 +7,6 @@ from augur.util.keys import mask_key -def key_fingerprint(key: str, length: int = 12) -> str: - """Return a short non-reversible fingerprint of the key for correlation.""" - h = hashlib.sha256(key.encode("utf-8")).hexdigest() - return h[:length] - - class RandomKeyAuth(Auth): """Custom Auth class for httpx that randomly assigns an api key to each request. @@ -39,8 +33,7 @@ def auth_flow(self, request: Request) -> Generator[Request, Response, None]: # Log only masked or hashed form, never the full key masked = mask_key(key_value) - fingerprint = key_fingerprint(key_value) - self.logger.debug(f"Key used for request (masked): {masked} | fingerprint: {fingerprint}") + self.logger.debug(f"Key used for request (masked): {masked}") # Apply formatting if needed if self.key_format: From f3f4065d8adf5c79c0b011016a83fee44967f4a3 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sun, 26 Oct 2025 10:08:30 +0530 Subject: [PATCH 042/105] Fix: python3.9 compatibility message in docs #3266 Signed-off-by: PredictiveManish --- docs/source/getting-started/installation.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index d2a79c4f71..9fa00dc291 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -31,8 +31,6 @@ Required: -**Python 3.9 is not yet supported because TensorFlow, which we use in our machine learning workers, does not yet support Python 3.9.** - Our REST API & data collection workers write in Python 3.6. We query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository, so GitLab and GitHub access tokens are **required** for data collection. Optional: From c615eb0720ba60c64afd60e9305e92439081c778 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 15 Oct 2025 20:41:19 +0100 Subject: [PATCH 043/105] install mypy Signed-off-by: Adrian Edwards --- pyproject.toml | 1 + uv.lock | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index c086babe25..c3c9b98552 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ dev = [ "pytest==6.2.5", "toml>=0.10.2", "ipdb==0.13.9", + "mypy>=1.18.2", {include-group = "docs"}, ] docs = [ diff --git a/uv.lock b/uv.lock index 1c74a61de7..8604dfbb4c 100644 --- a/uv.lock +++ b/uv.lock @@ -218,6 +218,7 @@ dependencies = [ dev = [ { name = "docutils" }, { name = "ipdb" }, + { name = "mypy" }, { name = "pytest" }, { name = "setuptools" }, { name = "sphinx" }, @@ -316,6 +317,7 @@ requires-dist = [ dev = [ { name = "docutils", specifier = "==0.20.1" }, { name = "ipdb", specifier = "==0.13.9" }, + { name = "mypy", specifier = ">=1.18.2" }, { name = "pytest", specifier = "==6.2.5" }, { name = "setuptools" }, { name = "sphinx", specifier = "==7.2.6" }, @@ -2016,6 +2018,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341, upload-time = "2025-06-13T06:52:27.835Z" }, ] +[[package]] +name = "mypy" +version = "1.18.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mypy-extensions" }, + { name = "pathspec" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/6f/657961a0743cff32e6c0611b63ff1c1970a0b482ace35b069203bf705187/mypy-1.18.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eab0cf6294dafe397c261a75f96dc2c31bffe3b944faa24db5def4e2b0f77c", size = 12807973, upload-time = "2025-09-19T00:10:35.282Z" }, + { url = "https://files.pythonhosted.org/packages/10/e9/420822d4f661f13ca8900f5fa239b40ee3be8b62b32f3357df9a3045a08b/mypy-1.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a780ca61fc239e4865968ebc5240bb3bf610ef59ac398de9a7421b54e4a207e", size = 11896527, upload-time = "2025-09-19T00:10:55.791Z" }, + { url = "https://files.pythonhosted.org/packages/aa/73/a05b2bbaa7005f4642fcfe40fb73f2b4fb6bb44229bd585b5878e9a87ef8/mypy-1.18.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448acd386266989ef11662ce3c8011fd2a7b632e0ec7d61a98edd8e27472225b", size = 12507004, upload-time = "2025-09-19T00:11:05.411Z" }, + { url = "https://files.pythonhosted.org/packages/4f/01/f6e4b9f0d031c11ccbd6f17da26564f3a0f3c4155af344006434b0a05a9d/mypy-1.18.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f9e171c465ad3901dc652643ee4bffa8e9fef4d7d0eece23b428908c77a76a66", size = 13245947, upload-time = "2025-09-19T00:10:46.923Z" }, + { url = "https://files.pythonhosted.org/packages/d7/97/19727e7499bfa1ae0773d06afd30ac66a58ed7437d940c70548634b24185/mypy-1.18.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:592ec214750bc00741af1f80cbf96b5013d81486b7bb24cb052382c19e40b428", size = 13499217, upload-time = "2025-09-19T00:09:39.472Z" }, + { url = "https://files.pythonhosted.org/packages/9f/4f/90dc8c15c1441bf31cf0f9918bb077e452618708199e530f4cbd5cede6ff/mypy-1.18.2-cp310-cp310-win_amd64.whl", hash = "sha256:7fb95f97199ea11769ebe3638c29b550b5221e997c63b14ef93d2e971606ebed", size = 9766753, upload-time = "2025-09-19T00:10:49.161Z" }, + { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" }, + { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" }, + { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" }, + { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" }, + { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" }, + { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" }, + { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" }, + { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" }, + { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" }, + { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" }, + { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" }, + { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" }, + { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" }, + { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" }, + { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" }, + { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" }, + { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" }, + { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "networkx" version = "3.4.2" @@ -2168,6 +2224,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl", hash = "sha256:978e4ac767ec4ba5b86c6eaa52e5a2a3bc748a2ca839e8cc798f1cc6ce6efb0f", size = 18905, upload-time = "2024-05-06T19:51:39.271Z" }, ] +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, +] + [[package]] name = "pexpect" version = "4.9.0" From 718034accfb87c9ef07221d085c677e1246a7236 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 16 Oct 2025 20:08:15 +0100 Subject: [PATCH 044/105] add mypy config to toml file Signed-off-by: Adrian Edwards --- pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index c3c9b98552..4ed15deea2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,3 +150,10 @@ constraint-dependencies = [ # https://docs.python.org/3.10/whatsnew/3.10.html#removed "graphql-server-core>1.1.1", ] + +[tool.mypy] +files = ['augur/application/db/util.py'] +ignore_missing_imports = true +follow_imports = "skip" +disallow_untyped_defs = false +exclude_gitignore = true From 2fbffd1e966e971726e7000ab2dec7b6b41e48cc Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 16 Oct 2025 20:08:43 +0100 Subject: [PATCH 045/105] add missing types packages as suggested by mypy Signed-off-by: Adrian Edwards --- pyproject.toml | 6 +++++- uv.lock | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4ed15deea2..1829b4e7d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,11 @@ dev = [ "toml>=0.10.2", "ipdb==0.13.9", "mypy>=1.18.2", - {include-group = "docs"}, + "types-requests>=2.31.0.6", + "types-pyyaml>=6.0.12.20250915", + "types-python-dateutil>=2.9.0.20251008", + "types-toml>=0.10.8.20240310", + { include-group = "docs" }, ] docs = [ "docutils==0.20.1", diff --git a/uv.lock b/uv.lock index 8604dfbb4c..18681966a7 100644 --- a/uv.lock +++ b/uv.lock @@ -227,6 +227,10 @@ dev = [ { name = "sphinxcontrib-redoc" }, { name = "toml" }, { name = "tox" }, + { name = "types-python-dateutil" }, + { name = "types-pyyaml" }, + { name = "types-requests" }, + { name = "types-toml" }, ] docs = [ { name = "docutils" }, @@ -326,6 +330,10 @@ dev = [ { name = "sphinxcontrib-redoc", specifier = "==1.6.0" }, { name = "toml", specifier = ">=0.10.2" }, { name = "tox", specifier = "==3.24.4" }, + { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, + { name = "types-requests", specifier = ">=2.31.0.6" }, + { name = "types-toml", specifier = ">=0.10.8.20240310" }, ] docs = [ { name = "docutils", specifier = "==0.20.1" }, @@ -3793,6 +3801,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, ] +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20251008" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/83/24ed25dd0c6277a1a170c180ad9eef5879ecc9a4745b58d7905a4588c80d/types_python_dateutil-2.9.0.20251008.tar.gz", hash = "sha256:c3826289c170c93ebd8360c3485311187df740166dbab9dd3b792e69f2bc1f9c", size = 16128, upload-time = "2025-10-08T02:51:34.93Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/af/5d24b8d49ef358468ecfdff5c556adf37f4fd28e336b96f923661a808329/types_python_dateutil-2.9.0.20251008-py3-none-any.whl", hash = "sha256:b9a5232c8921cf7661b29c163ccc56055c418ab2c6eabe8f917cbcc73a4c4157", size = 17934, upload-time = "2025-10-08T02:51:33.55Z" }, +] + +[[package]] +name = "types-pyyaml" +version = "6.0.12.20250915" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522, upload-time = "2025-09-15T03:01:00.728Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, +] + +[[package]] +name = "types-requests" +version = "2.31.0.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/b8/c1e8d39996b4929b918aba10dba5de07a8b3f4c8487bb61bb79882544e69/types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0", size = 15535, upload-time = "2023-09-27T06:19:38.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/a1/6f8dc74d9069e790d604ddae70cb46dcbac668f1bb08136e7b0f2f5cd3bf/types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9", size = 14516, upload-time = "2023-09-27T06:19:36.373Z" }, +] + +[[package]] +name = "types-toml" +version = "0.10.8.20240310" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/86/47/3e4c75042792bff8e90d7991aa5c51812cc668828cc6cce711e97f63a607/types-toml-0.10.8.20240310.tar.gz", hash = "sha256:3d41501302972436a6b8b239c850b26689657e25281b48ff0ec06345b8830331", size = 4392, upload-time = "2024-03-10T02:18:37.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/a2/d32ab58c0b216912638b140ab2170ee4b8644067c293b170e19fba340ccc/types_toml-0.10.8.20240310-py3-none-any.whl", hash = "sha256:627b47775d25fa29977d9c70dc0cbab3f314f32c8d8d0c012f2ef5de7aaec05d", size = 4777, upload-time = "2024-03-10T02:18:36.568Z" }, +] + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/73/de/b9d7a68ad39092368fb21dd6194b362b98a1daeea5dcfef5e1adb5031c7e/types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f", size = 11239, upload-time = "2023-07-20T15:19:31.307Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/7b/3fc711b2efea5e85a7a0bbfe269ea944aa767bbba5ec52f9ee45d362ccf3/types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e", size = 15377, upload-time = "2023-07-20T15:19:30.379Z" }, +] + [[package]] name = "typing-extensions" version = "4.7.1" From 7f5056e066f16574539a0c40821efd256234ac44 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 15 Oct 2025 22:04:13 +0100 Subject: [PATCH 046/105] broaden checking to other application db files too Signed-off-by: Adrian Edwards --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1829b4e7d1..a3866d86e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -156,7 +156,7 @@ constraint-dependencies = [ ] [tool.mypy] -files = ['augur/application/db/util.py'] +files = ['augur/application/db/*.py'] ignore_missing_imports = true follow_imports = "skip" disallow_untyped_defs = false From 85e317cd5af49d02b95e8bdf784367bfc88729eb Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 15 Oct 2025 22:23:39 +0100 Subject: [PATCH 047/105] type fixes for Application DB files Signed-off-by: Adrian Edwards --- augur/application/db/data_parse.py | 8 ++++---- augur/application/db/engine.py | 2 +- augur/application/db/lib.py | 22 ++++++++++++---------- augur/application/db/session.py | 17 ++++++++++------- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index de0d9aaa81..eaa99fd394 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -457,7 +457,7 @@ def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, too -def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Retrieve only the needed data for pr labels from the api response @@ -487,7 +487,7 @@ def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: return message_ref_dict # retrieve only the needed data for pr labels from the api response -def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: message_ref_dict = { 'pull_request_id': pull_request_id, @@ -1128,7 +1128,7 @@ def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, t return all_meta -def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Extract the message id for a given message on an issue from an api response and connect it to the relevant repo id. @@ -1190,7 +1190,7 @@ def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: return comment_dict -def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Retrieve only the needed data for pr labels from the api response diff --git a/augur/application/db/engine.py b/augur/application/db/engine.py index 2870909093..0ea2bc1730 100644 --- a/augur/application/db/engine.py +++ b/augur/application/db/engine.py @@ -10,7 +10,7 @@ from augur.application.db.util import catch_operational_error -def parse_database_string(db_string: str) -> str: +def parse_database_string(db_string: str) -> tuple[str,str, str, str, str]: """Parse database string into the following components: username, password, host, port, database """ diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 52efee87ee..09820168fc 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -271,31 +271,32 @@ def facade_bulk_insert_commits(logger, records): session.commit() else: raise e - - -def batch_insert_contributors(logger, data: Union[List[dict], dict]) -> Optional[List[dict]]: - batch_size = 1000 +def batch_insert_contributors(logger, data: Union[List[dict], dict], batch_size = 1000) -> Optional[List[dict]]: for i in range(0, len(data), batch_size): batch = data[i:i + batch_size] bulk_insert_dicts(logger, batch, Contributor, ['cntrb_id']) + + return None -def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: +def bulk_insert_dicts(logger, data_input: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: - if isinstance(data, list) is False: + if isinstance(data_input, list) is False: # if a dict is passed to data then # convert it to a list with one value - if isinstance(data, dict) is True: - data = [data] + if isinstance(data_input, dict) is True: + data = [data_input] else: logger.error("Data must be a list or a dict") return None + else: + data = list(data_input) if len(data) == 0: # self.logger.info("Gave no data to insert, returning...") @@ -397,8 +398,9 @@ def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys if deadlock_detected is True: logger.error("Made it through even though Deadlock was detected") - - return "success" + + # success + return None # othewise it gets the requested return columns and returns them as a list of dicts diff --git a/augur/application/db/session.py b/augur/application/db/session.py index a26fc172b7..661e989dd4 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -93,18 +93,20 @@ def fetchall_data_from_sql_text(self,sql_text): result = connection.execute(sql_text) return [dict(row) for row in result.mappings()] - def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: + def insert_data(self, data_input: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: - if isinstance(data, list) is False: + if isinstance(data_input, list) is False: # if a dict is passed to data then # convert it to a list with one value - if isinstance(data, dict) is True: - data = [data] + if isinstance(data_input, dict) is True: + data = [data_input] else: self.logger.info("Data must be a list or a dict") return None + else: + data = list(data_input) if len(data) == 0: # self.logger.info("Gave no data to insert, returning...") @@ -166,7 +168,7 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s # if there is no data to return then it executes the insert then returns nothing if not return_columns: - + # TODO: duplicate-looking code alert while attempts < 10: try: #begin keyword is needed for sqlalchemy 2.x @@ -205,8 +207,9 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s if deadlock_detected is True: self.logger.error("Made it through even though Deadlock was detected") - - return "success" + + # success + return None # othewise it gets the requested return columns and returns them as a list of dicts From b902e0d1ada0d9d2a95228d0a23c8e2f4c1af22f Mon Sep 17 00:00:00 2001 From: Manish Tiwari Date: Tue, 28 Oct 2025 13:00:52 +0530 Subject: [PATCH 048/105] Update docs/source/getting-started/installation.rst Co-authored-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Signed-off-by: Manish Tiwari --- docs/source/getting-started/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index 9fa00dc291..41bc1be4dc 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -31,7 +31,7 @@ Required: -Our REST API & data collection workers write in Python 3.6. We query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository, so GitLab and GitHub access tokens are **required** for data collection. +Our REST API & data collection workers query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository. Values for GitLab and GitHub access tokens are **required** for data collection and must be provided (an invalid token can be provided if you don't plan to use one platform) . Optional: From 15f1f27df6e7a62868511139b38003b838de3203 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 28 Oct 2025 17:53:56 -0500 Subject: [PATCH 049/105] testing Signed-off-by: Sean P. Goggins --- docker/empty_database/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/empty_database/Dockerfile b/docker/empty_database/Dockerfile index d4e3122450..cc375a0f84 100644 --- a/docker/empty_database/Dockerfile +++ b/docker/empty_database/Dockerfile @@ -1,5 +1,9 @@ from postgres:16 AS builder +RUN apt-get update && \ + apt-get install -y gcc python3-dev && \ + rm -rf /var/lib/apt/lists/* + ENV DEBIAN_FRONTEND=noninteractive # Install uv (https://docs.astral.sh/uv/guides/integration/docker/#installing-uv) From 5acb58b5d3dd43e7becb2896bec4f2971bfd0f95 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 12:41:13 -0400 Subject: [PATCH 050/105] remove empty db image build Signed-off-by: Adrian Edwards --- .github/workflows/build_docker.yml | 1 - docker/empty_database/Dockerfile | 70 ------------------------------ 2 files changed, 71 deletions(-) delete mode 100644 docker/empty_database/Dockerfile diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index 2f82922617..3a0e3f953a 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -316,7 +316,6 @@ jobs: - database - keyman - rabbitmq - - empty_database runs-on: ubuntu-latest steps: - name: Checkout repository diff --git a/docker/empty_database/Dockerfile b/docker/empty_database/Dockerfile deleted file mode 100644 index cc375a0f84..0000000000 --- a/docker/empty_database/Dockerfile +++ /dev/null @@ -1,70 +0,0 @@ -from postgres:16 AS builder - -RUN apt-get update && \ - apt-get install -y gcc python3-dev && \ - rm -rf /var/lib/apt/lists/* - -ENV DEBIAN_FRONTEND=noninteractive - -# Install uv (https://docs.astral.sh/uv/guides/integration/docker/#installing-uv) -COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -ENV UV_COMPILE_BYTECODE=1 -# The uv package cache will be on a cache volume, so can't be linked -ENV UV_LINK_MODE=copy -# Assert that the lockfile (uv.lock) is up-to-date. Use `uv lock` to update it -# manually if this fails the container build. -ENV UV_LOCKED=1 - -WORKDIR /augur - -COPY pyproject.toml . -COPY uv.lock . -COPY .python-version . - -# Install augur's dependencies early to take advantage of build cache -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --no-install-project --no-dev - -# Copy in the actual code -# The RUN line below ensure that permissions are set correctly. -# This is the equivalent of the following docker --chmod flags, but done in a way thats compatible with podman. -# This can be removed once https://github.com/containers/buildah/issues/6066 or relevant equivalent is fixed -# - u=rw,u+X: user can read and write all files/dirs and execute directories -# - go=r,go+X: group and others can read all files/dirs and execute directories -COPY README.md . -COPY LICENSE . -COPY alembic.ini . -COPY augur/ augur/ -COPY metadata.py . -COPY scripts/ scripts/ - -RUN find augur -type d -exec chmod u=rwx,go=rx {} + && find augur -type f -exec chmod u=rw,go=r {} + - -RUN find scripts -exec chmod u=rwx,go=rx {} + - -# Install the main project -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --no-dev - -# We aren't going to activate the virtualenv (manually or via uv run), so we -# need adjust the PATH -ENV PATH="/augur/.venv/bin:${PATH}" - -ENV POSTGRES_DB="augur" -ENV POSTGRES_USER="augur" -ENV POSTGRES_PASSWORD="augur" -ENV AUGUR_DB="postgresql+psycopg2://augur:augur@localhost:5432/augur" -# ENV PGDATA="/var/lib/postgresql/data" - -RUN set -e && \ - gosu postgres initdb && \ - gosu postgres pg_ctl -D "$PGDATA" -o "-c listen_addresses='localhost'" -w start && \ - gosu postgres psql -c "CREATE USER ${POSTGRES_USER} WITH SUPERUSER PASSWORD '${POSTGRES_PASSWORD}';" && \ - gosu postgres psql -c "CREATE DATABASE ${POSTGRES_DB} OWNER ${POSTGRES_USER};" && \ - augur db create-schema && \ - gosu postgres pg_ctl -D "$PGDATA" -m fast -w stop - - -FROM postgres:16 - -COPY --from=builder /var/lib/postgresql/data /var/lib/postgresql/data From a10228a23fafe612f56a83daa32391081769463b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 15:24:05 -0400 Subject: [PATCH 051/105] fetch facade worker options from the correct config Signed-off-by: Adrian Edwards --- augur/application/config.py | 1 - augur/tasks/git/util/facade_worker/facade_worker/config.py | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 2cc6f65cdb..1af08a7aa3 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -38,7 +38,6 @@ def get_development_flag(): "github": "", "gitlab": "" }, - #TODO: a lot of these are deprecated. "Facade": { "check_updates": 1, "create_xlsx_summary_files": 1, diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index f060b34390..22a73a78be 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -36,7 +36,6 @@ from augur.application.db.session import DatabaseSession from augur.application.db.lib import execute_sql -from augur.application.db.lib import get_section from logging import Logger logger = logging.getLogger(__name__) @@ -110,7 +109,10 @@ def __init__(self,logger: Logger): self.logger = logger - worker_options = get_section("Facade") + with DatabaseSession(logger, engine) as session: + config = AugurConfig(logger, session) + + worker_options = config.get_section("Facade") self.limited_run = worker_options["limited_run"] self.delete_marked_repos = worker_options["delete_marked_repos"] From 410f025c76fc5ad0254261efab6c85766e4bef21 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 15:24:56 -0400 Subject: [PATCH 052/105] refactor the getting of enabled phase names to accept explicit parameters of varying types Signed-off-by: Adrian Edwards --- augur/api/routes/dei.py | 2 +- augur/tasks/start_tasks.py | 4 ++-- augur/tasks/util/collection_util.py | 12 +++++++++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/augur/api/routes/dei.py b/augur/api/routes/dei.py index 621c89604d..646081ba2c 100644 --- a/augur/api/routes/dei.py +++ b/augur/api/routes/dei.py @@ -69,7 +69,7 @@ def dei_track_repo(application: ClientApplication): "repo_id": repo_id } - enabled_phase_names = get_enabled_phase_names_from_config() + enabled_phase_names = get_enabled_phase_names_from_config_session(session, logger) #Primary collection hook. primary_enabled_phases = [] diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 3a61e391a8..e0f9c36c37 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -140,7 +140,7 @@ def non_repo_domain_tasks(self): logger.info("Executing non-repo domain tasks") - enabled_phase_names = get_enabled_phase_names_from_config() + enabled_phase_names = get_enabled_phase_names_from_config(engine, logger) enabled_tasks = [] @@ -245,7 +245,7 @@ def augur_collection_monitor(self): #Get list of enabled phases - enabled_phase_names = get_enabled_phase_names_from_config() + enabled_phase_names = get_enabled_phase_names_from_config(engine, logger) enabled_collection_hooks = [] diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 28489d63c8..fdcdd68cac 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -10,7 +10,6 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.application.db.models import CollectionStatus, Repo from augur.application.db.util import execute_session_query -from augur.application.db.lib import get_section from augur.tasks.github.util.util import get_repo_weight_core, get_repo_weight_by_issue from augur.application.db import get_engine from augur.application.db.lib import execute_sql, get_session, get_active_repo_count, get_repo_by_repo_git @@ -204,9 +203,16 @@ def get_repos_for_recollection(session, limit, hook, days_until_collect_again): return valid_repo_git_list -def get_enabled_phase_names_from_config(): +def get_enabled_phase_names_from_config(engine, logger): + with DatabaseSession(logger, engine) as session: + return get_enabled_phase_names_from_config_session(session, logger) - phase_options = get_section("Task_Routine") + +def get_enabled_phase_names_from_config_session(session, logger): + + config = AugurConfig(logger, session) + + phase_options = config.get_section("Task_Routine") #Get list of enabled phases enabled_phase_names = [name for name, phase in phase_options.items() if phase == 1] From c1a0bf6745cdfbed46e365e3a90cea37f67560f2 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 15:25:07 -0400 Subject: [PATCH 053/105] remove legacy FacadeConfig comments Signed-off-by: Adrian Edwards --- .../facade_worker/facade_worker/config.py | 253 ------------------ 1 file changed, 253 deletions(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index 22a73a78be..4ca4d1d134 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -253,256 +253,3 @@ def insert_or_update_data(self, query, **bind_args)-> None: return def inc_repos_processed(self): self.repos_processed += 1 - -# def get_last_collected_commit_date(self,repo_id): -# commit_date_query = s.sql.text(""" -# SELECT cmt_committer_timestamp FROM commits -# WHERE repo_id=:repo_id -# ORDER BY data_collection_date DESC -# LIMIT 1; -# """).bindparams(repo_id=repo_id) -# -# result = execute_sql(commit_date_query).fetchone() -# return result[0] - -""" -class FacadeConfig: - \"""Legacy facade config that holds facade's database functionality - - This is mainly for compatibility with older functions from legacy facade. - - Initializes database when it encounters a database exception - - Attributes: - repos_processed (int): Counter for how many repos have been analyzed - cursor (psycopg2.extensions.cursor): database cursor for legacy facade. - logger (Logger): logger object inherited from the session object - db (psycopg2.extensions.connection): database connection object for legacy facade. - tool_source (str): String marking the source of data as from facade. - data_source (str): String indicating that facade gets data from git - tool_version (str): Facade version - worker_options (dict): Config options for facade. - log_level (str): Keyword indicating level of logging for legacy facade. - \""" - def __init__(self, logger: Logger): - self.repos_processed = 0 - self.cursor = None - self.logger = logger - - self.db = None - - #init db first thing - db_credentials = get_database_args_from_env() - - # Set up the database - db_user = db_credentials["db_user"] - db_pass = db_credentials["db_pass"] - db_name = db_credentials["db_name"] - db_host = db_credentials["db_host"] - db_port = db_credentials["db_port"] - db_user_people = db_user - db_pass_people = db_pass - db_name_people = db_name - db_host_people = db_host - db_port_people = db_port - # Open a general-purpose connection - db,cursor = self.database_connection( - db_host, - db_user, - db_pass, - db_name, - db_port, False, False) - - #worker_options = read_config("Workers", "facade_worker", None, None) - - with DatabaseSession(logger, engine) as session: - config = AugurConfig(logger, session) - worker_options = config.get_section("Facade") - - if 'repo_directory' in worker_options: - self.repo_base_directory = worker_options['repo_directory'] - else: - self.log_activity('Error',"Please specify a \'repo_directory\' parameter" - " in your \'Workers\' -> \'facade_worker\' object in your config " - "to the directory in which you want to clone repos. Exiting...") - sys.exit(1) - - self.tool_source = '\'Facade \'' - self.tool_version = '\'1.3.0\'' - self.data_source = '\'Git Log\'' - - self.worker_options = worker_options - - # Figure out how much we're going to log - #logging.basicConfig(filename='worker_{}.log'.format(worker_options['port']), filemode='w', level=logging.INFO) - self.log_level = None #self.get_setting('log_level') - - - #### Database update functions #### - - def increment_db(self, version): - - # Helper function to increment the database number - - increment_db = ("INSERT INTO settings (setting,value) " - "VALUES ('database_version',%s)") - self.cursor.execute(increment_db, (version, )) - db.commit() - - print("Database updated to version: %s" % version) - - def update_db(self, version): - - # This function should incrementally step any version of the database up to - # the current schema. After executing the database operations, call - # increment_db to bring it up to the version with which it is now compliant. - - print("Legacy Facade Block for DB UPDATE. No longer used. ") - - print("No further database updates.\n") - - def migrate_database_config(self): - - # Since we're changing the way we store database credentials, we need a way to - # transparently migrate anybody who was using the old file. Someday after a long - # while this can disappear. - - try: - # If the old database config was found, write a new config - imp.find_module('db') - - db_config = configparser.ConfigParser() - - from db import db_user,db_pass,db_name,db_host - from db import db_user_people,db_pass_people,db_name_people,db_host_people - - db_config.add_section('main_database') - db_config.set('main_database','user',db_user) - db_config.set('main_database','pass',db_pass) - db_config.set('main_database','name',db_name) - db_config.set('main_database','host',db_host) - - db_config.add_section('people_database') - db_config.set('people_database','user',db_user_people) - db_config.set('people_database','pass',db_pass_people) - db_config.set('people_database','name',db_name_people) - db_config.set('people_database','host',db_host_people) - - with open('db.cfg','w') as db_file: - db_config.write(db_file) - - print("Migrated old style config file to new.") - except: - # If nothing is found, the user probably hasn't run setup yet. - sys.exit("Can't find database config. Have you run setup.py?") - - try: - os.remove('db.py') - os.remove('db.pyc') - print("Removed unneeded config files") - except: - print("Attempted to remove unneeded config files") - - return db_user,db_pass,db_name,db_host,db_user_people,db_pass_people,db_name_people,db_host_people - - #### Global helper functions #### - - def database_connection(self, db_host,db_user,db_pass,db_name, db_port, people, multi_threaded_connection): - - # Return a database connection based upon which interpreter we're using. CPython - # can use any database connection, although MySQLdb is preferred over pymysql - # for performance reasons. However, PyPy can't use MySQLdb at this point, - # instead requiring a pure python MySQL client. This function returns a database - # connection that should provide maximum performance depending upon the - # interpreter in use. - - ##TODO: Postgres connections as we make them ARE threadsafe. We *could* refactor this accordingly: https://www.psycopg.org/docs/connection.html #noturgent - - - # if platform.python_implementation() == 'PyPy': - db_schema = 'augur_data' - db = psycopg2.connect( - host = db_host, - user = db_user, - password = db_pass, - database = db_name, - # charset = 'utf8mb4', - port = db_port, - options=f'-c search_path={db_schema}', - connect_timeout = 31536000,) - - cursor = db.cursor()#pymysql.cursors.DictCursor) - - - self.cursor = cursor - self.db = db - - # Figure out how much we're going to log - #self.log_level = self.get_setting('log_level') - #Not getting debug logging for some reason. - self.log_level = 'Debug' - return db, cursor - - def get_setting(self, setting): - - # Get a setting from the database - - query = (\"""SELECT value FROM settings WHERE setting=%s ORDER BY - last_modified DESC LIMIT 1\""") - self.cursor.execute(query, (setting, )) - # print(type(self.cursor.fetchone())) - return self.cursor.fetchone()[0]#["value"] - - def update_status(self, status): - - # Update the status displayed in the UI - - query = ("UPDATE settings SET value=%s WHERE setting='utility_status'") - self.cursor.execute(query, (status, )) - self.db.commit() - - def log_activity(self, level, status): - - # Log an activity based upon urgency and user's preference. If the log level is - # "Debug", then just print it and don't save it in the database. - - log_options = ('Error','Quiet','Info','Verbose','Debug') - self.logger.info("* %s\n" % status) - if self.log_level == 'Debug' and level == 'Debug': - return - - #if log_options.index(level) <= log_options.index(self.log_level): - query = ("INSERT INTO utility_log (level,status) VALUES (%s,%s)") - try: - self.cursor.execute(query, (level, status)) - self.db.commit() - except Exception as e: - self.logger.info('Error encountered: {}\n'.format(e)) - - db_credentials = get_database_args_from_env() - - # Set up the database - db_user = db_credentials["db_user"] - db_pass = db_credentials["db_pass"] - db_name = db_credentials["db_name"] - db_host = db_credentials["db_host"] - db_port = db_credentials["db_port"] - db_user_people = db_user - db_pass_people = db_pass - db_name_people = db_name - db_host_people = db_host - db_port_people = db_port - # Open a general-purpose connection - db,cursor = self.database_connection( - db_host, - db_user, - db_pass, - db_name, - db_port, False, False) - self.cursor.execute(query, (level, status)) - self.db.commit() - - - - -""" From 03ff84983dfa1806eee91327031174c81c7007b0 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 15:26:18 -0400 Subject: [PATCH 054/105] remove unused lib.get_section Signed-off-by: Adrian Edwards --- augur/application/db/lib.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 09820168fc..d7db4c3e43 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -19,34 +19,6 @@ logger = logging.getLogger("db_lib") -def get_section(section_name) -> dict: - """Get a section of data from the config. - - Args: - section_name: The name of the section being retrieved - - Returns: - The section data as a dict - """ - with get_session() as session: - - query = session.query(Config).filter_by(section_name=section_name) - section_data = execute_session_query(query, 'all') - - section_dict = {} - for setting in section_data: - setting_dict = setting.__dict__ - - setting_dict = convert_type_of_value(setting_dict, logger) - - setting_name = setting_dict["setting_name"] - setting_value = setting_dict["value"] - - section_dict[setting_name] = setting_value - - return section_dict - - def get_value(section_name: str, setting_name: str) -> Optional[Any]: """Get the value of a setting from the config. From 2008e7f5397ee497b79e59f90049b0d1a936c706 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 16:27:00 -0400 Subject: [PATCH 055/105] refactor again to add a function that can get enabled phase names from an existing config object Signed-off-by: Adrian Edwards --- augur/tasks/util/collection_util.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index fdcdd68cac..650052bbb2 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -211,7 +211,10 @@ def get_enabled_phase_names_from_config(engine, logger): def get_enabled_phase_names_from_config_session(session, logger): config = AugurConfig(logger, session) + return get_enabled_phase_names_from_config_object(config) + +def get_enabled_phase_names_from_config_object(config): phase_options = config.get_section("Task_Routine") #Get list of enabled phases From 462ed6a10bfe38219e6a65ace4dbcdaff5ad6b4f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 28 Oct 2025 20:34:07 -0400 Subject: [PATCH 056/105] apply coalesce to values being bulk-added so that null values get skipped Signed-off-by: Adrian Edwards Co-authored-by: Mahmoud Abdelrazek <44040283+razekmh@users.noreply.github.com> Co-authored-by: Andrew Brain --- augur/application/db/lib.py | 26 ++++++++++++++++++++++++-- augur/application/db/session.py | 3 ++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 09820168fc..c90aa41f94 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -284,6 +284,28 @@ def batch_insert_contributors(logger, data: Union[List[dict], dict], batch_size def bulk_insert_dicts(logger, data_input: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: + """ Provides bulk-insert/update (upsert) capabilitites for adding bulk data (as a column:value dict mapping) into a specific table + + Args: + logger (_type_): the logger to use + data_input (Union[List[dict], dict]): the dicts to upsert (must match the column names as defined in the schema for the table) + table (_type_): the table to upsert the data into + natural_keys (List[str]): the columns that define the natural unique keys for the data + return_columns (Optional[List[str]], optional): list of the column names to return. Defaults to None. + string_fields (Optional[List[str]], optional): list of keys in the incoming dicts that should be cleaned to handle bad characters postgres doesnt like. Defaults to None. + on_conflict_update (bool, optional): whether to update on conflict. Defaults to True. + + Raises: + e: _description_ + e: _description_ + Exception: _description_ + e: _description_ + e: _description_ + Exception: _description_ + + Returns: + Optional[List[dict]]: the original data with each item filtered to only contain the columns specified by `return_columns`, if present. + """ if isinstance(data_input, list) is False: @@ -333,7 +355,7 @@ def bulk_insert_dicts(logger, data_input: Union[List[dict], dict], table, natura # create a dict that the on_conflict_do_update method requires to be able to map updates whenever there is a conflict. See sqlalchemy docs for more explanation and examples: https://docs.sqlalchemy.org/en/14/dialects/postgresql.html#updating-using-the-excluded-insert-values setDict = {} for key in data[0].keys(): - setDict[key] = getattr(stmnt.excluded, key) + setDict[key] = func.coalesce(getattr(stmnt.excluded, key), getattr(table.c, key)) stmnt = stmnt.on_conflict_do_update( #This might need to change @@ -604,4 +626,4 @@ def get_repo_group_by_name(name): with get_session() as session: return session.query(RepoGroup).filter(RepoGroup.rg_name == name).first() - \ No newline at end of file + diff --git a/augur/application/db/session.py b/augur/application/db/session.py index 661e989dd4..0da439a252 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -3,6 +3,7 @@ from sqlalchemy.orm import Session from sqlalchemy.dialects import postgresql from sqlalchemy.exc import OperationalError +from sqlalchemy import func from typing import Optional, List, Union from psycopg2.errors import DeadlockDetected @@ -143,7 +144,7 @@ def insert_data(self, data_input: Union[List[dict], dict], table, natural_keys: # create a dict that the on_conflict_do_update method requires to be able to map updates whenever there is a conflict. See sqlalchemy docs for more explanation and examples: https://docs.sqlalchemy.org/en/14/dialects/postgresql.html#updating-using-the-excluded-insert-values setDict = {} for key in data[0].keys(): - setDict[key] = getattr(stmnt.excluded, key) + setDict[key] = func.coalesce(getattr(stmnt.excluded, key), getattr(table.c, key)) stmnt = stmnt.on_conflict_do_update( #This might need to change From f5acdf59cbb4deef0fde2d6ef1c8a88f631dbb27 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 12:49:38 -0400 Subject: [PATCH 057/105] apply fix to allow this solution to work when `table` arg is an ORM object as well. Co-Created by: gpt-5 via cursor Signed-off-by: Adrian Edwards --- augur/application/db/lib.py | 4 +++- augur/application/db/session.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index c90aa41f94..d167b3755c 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -354,8 +354,10 @@ def bulk_insert_dicts(logger, data_input: Union[List[dict], dict], table, natura # create a dict that the on_conflict_do_update method requires to be able to map updates whenever there is a conflict. See sqlalchemy docs for more explanation and examples: https://docs.sqlalchemy.org/en/14/dialects/postgresql.html#updating-using-the-excluded-insert-values setDict = {} + base_table = getattr(table, "__table__", table) for key in data[0].keys(): - setDict[key] = func.coalesce(getattr(stmnt.excluded, key), getattr(table.c, key)) + existing_col = getattr(base_table.c, key) + setDict[key] = func.coalesce(getattr(stmnt.excluded, key), existing_col) stmnt = stmnt.on_conflict_do_update( #This might need to change diff --git a/augur/application/db/session.py b/augur/application/db/session.py index 0da439a252..920b6fe6b0 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -143,8 +143,10 @@ def insert_data(self, data_input: Union[List[dict], dict], table, natural_keys: # create a dict that the on_conflict_do_update method requires to be able to map updates whenever there is a conflict. See sqlalchemy docs for more explanation and examples: https://docs.sqlalchemy.org/en/14/dialects/postgresql.html#updating-using-the-excluded-insert-values setDict = {} + base_table = getattr(table, "__table__", table) for key in data[0].keys(): - setDict[key] = func.coalesce(getattr(stmnt.excluded, key), getattr(table.c, key)) + existing_col = getattr(base_table.c, key) + setDict[key] = func.coalesce(getattr(stmnt.excluded, key), existing_col) stmnt = stmnt.on_conflict_do_update( #This might need to change From 6e47e06533219d30573fd11dfe6b26b3f03821bd Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 17:16:30 -0400 Subject: [PATCH 058/105] fix missing imports Signed-off-by: Adrian Edwards --- augur/tasks/git/util/facade_worker/facade_worker/config.py | 1 + augur/tasks/util/collection_util.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index 4ca4d1d134..21fe424d10 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -35,6 +35,7 @@ from psycopg2.errors import DeadlockDetected from augur.application.db.session import DatabaseSession +from augur.application.config import AugurConfig from augur.application.db.lib import execute_sql from logging import Logger diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 650052bbb2..3f5ba75434 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -15,6 +15,8 @@ from augur.application.db.lib import execute_sql, get_session, get_active_repo_count, get_repo_by_repo_git from augur.tasks.util.worker_util import calculate_date_weight_from_timestamps from augur.tasks.util.collection_state import CollectionState +from augur.application.db.session import DatabaseSession +from augur.application.config import AugurConfig def get_list_of_all_users(): From e29a1d8728fb08ef3ac474f7633cfdc3db3cbfc8 Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Thu, 30 Oct 2025 10:06:05 +0500 Subject: [PATCH 059/105] Fix hardcoded collection intervals by reading from config at call time --- augur/application/config.py | 6 +++++- augur/tasks/start_tasks.py | 23 ++++++++++++++++++----- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 2cc6f65cdb..89e8fc57f0 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -80,7 +80,11 @@ def get_development_flag(): "connection_string": "amqp://augur:password123@localhost:5672/augur_vhost" }, "Tasks": { - "collection_interval": 30 + "collection_interval": 30, + "core_collection_interval": 15, + "secondary_collection_interval": 10, + "facade_collection_interval": 10, + "ml_collection_interval": 40 }, "Message_Insights": { "insight_days": 30, diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 3a61e391a8..07c44c9bf7 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -28,7 +28,7 @@ from augur.tasks.util.collection_state import CollectionState from augur.tasks.util.collection_util import * from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_facade_weight_time_factor -from augur.application.db.lib import execute_sql, get_session +from augur.application.db.lib import execute_sql, get_session, get_section RUNNING_DOCKER = os.environ.get('AUGUR_DOCKER_DEPLOY') == "1" @@ -175,7 +175,10 @@ def core_task_success_util_gen(repo_git, full_collection): primary_enabled_phases.append(core_task_success_util_gen) primary_gitlab_enabled_phases.append(core_task_success_util_gen) - primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=15, gitlab_phases=primary_gitlab_enabled_phases) + # Get core collection interval from config + tasks_config = get_section("Tasks") + core_interval = tasks_config.get('core_collection_interval', 15) + primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=core_interval, gitlab_phases=primary_gitlab_enabled_phases) primary_request.get_valid_repos(session) return primary_request @@ -193,7 +196,11 @@ def secondary_task_success_util_gen(repo_git, full_collection): return secondary_task_success_util.si(repo_git) secondary_enabled_phases.append(secondary_task_success_util_gen) - request = CollectionRequest("secondary",secondary_enabled_phases,max_repo=60, days_until_collect_again=10) + + # Get secondary collection interval from config + tasks_config = get_section("Tasks") + secondary_interval = tasks_config.get('secondary_collection_interval', 10) + request = CollectionRequest("secondary",secondary_enabled_phases,max_repo=60, days_until_collect_again=secondary_interval) request.get_valid_repos(session) return request @@ -215,7 +222,10 @@ def facade_task_update_weight_util_gen(repo_git, full_collection): facade_enabled_phases.append(facade_task_update_weight_util_gen) - request = CollectionRequest("facade",facade_enabled_phases,max_repo=30, days_until_collect_again=10) + # Get facade collection interval from config + tasks_config = get_section("Tasks") + facade_interval = tasks_config.get('facade_collection_interval', 10) + request = CollectionRequest("facade",facade_enabled_phases,max_repo=30, days_until_collect_again=facade_interval) request.get_valid_repos(session) return request @@ -230,7 +240,10 @@ def ml_task_success_util_gen(repo_git, full_collection): ml_enabled_phases.append(ml_task_success_util_gen) - request = CollectionRequest("ml",ml_enabled_phases,max_repo=5, days_until_collect_again=40) + # Get ML collection interval from config + tasks_config = get_section("Tasks") + ml_interval = tasks_config.get('ml_collection_interval', 40) + request = CollectionRequest("ml",ml_enabled_phases,max_repo=5, days_until_collect_again=ml_interval) request.get_valid_repos(session) return request From 9ad93b6ac28fcc57caee61a92db602aea620193a Mon Sep 17 00:00:00 2001 From: mohsinm-dev Date: Fri, 31 Oct 2025 07:06:37 +0500 Subject: [PATCH 060/105] Move config injection to call site and use days_until_collect_again parameter --- augur/tasks/start_tasks.py | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 07c44c9bf7..4a46f84705 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -28,7 +28,8 @@ from augur.tasks.util.collection_state import CollectionState from augur.tasks.util.collection_util import * from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_facade_weight_time_factor -from augur.application.db.lib import execute_sql, get_session, get_section +from augur.application.db.lib import execute_sql, get_session +from augur.application.config import AugurConfig RUNNING_DOCKER = os.environ.get('AUGUR_DOCKER_DEPLOY') == "1" @@ -175,10 +176,7 @@ def core_task_success_util_gen(repo_git, full_collection): primary_enabled_phases.append(core_task_success_util_gen) primary_gitlab_enabled_phases.append(core_task_success_util_gen) - # Get core collection interval from config - tasks_config = get_section("Tasks") - core_interval = tasks_config.get('core_collection_interval', 15) - primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=core_interval, gitlab_phases=primary_gitlab_enabled_phases) + primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=days_until_collect_again, gitlab_phases=primary_gitlab_enabled_phases) primary_request.get_valid_repos(session) return primary_request @@ -197,10 +195,7 @@ def secondary_task_success_util_gen(repo_git, full_collection): secondary_enabled_phases.append(secondary_task_success_util_gen) - # Get secondary collection interval from config - tasks_config = get_section("Tasks") - secondary_interval = tasks_config.get('secondary_collection_interval', 10) - request = CollectionRequest("secondary",secondary_enabled_phases,max_repo=60, days_until_collect_again=secondary_interval) + request = CollectionRequest("secondary",secondary_enabled_phases,max_repo=60, days_until_collect_again=days_until_collect_again) request.get_valid_repos(session) return request @@ -222,10 +217,7 @@ def facade_task_update_weight_util_gen(repo_git, full_collection): facade_enabled_phases.append(facade_task_update_weight_util_gen) - # Get facade collection interval from config - tasks_config = get_section("Tasks") - facade_interval = tasks_config.get('facade_collection_interval', 10) - request = CollectionRequest("facade",facade_enabled_phases,max_repo=30, days_until_collect_again=facade_interval) + request = CollectionRequest("facade",facade_enabled_phases,max_repo=30, days_until_collect_again=days_until_collect_again) request.get_valid_repos(session) return request @@ -240,10 +232,7 @@ def ml_task_success_util_gen(repo_git, full_collection): ml_enabled_phases.append(ml_task_success_util_gen) - # Get ML collection interval from config - tasks_config = get_section("Tasks") - ml_interval = tasks_config.get('ml_collection_interval', 40) - request = CollectionRequest("ml",ml_enabled_phases,max_repo=5, days_until_collect_again=ml_interval) + request = CollectionRequest("ml",ml_enabled_phases,max_repo=5, days_until_collect_again=days_until_collect_again) request.get_valid_repos(session) return request @@ -264,19 +253,26 @@ def augur_collection_monitor(self): with DatabaseSession(logger, self.app.engine) as session: + # Get config values for collection intervals + config = AugurConfig(logger, session) + core_interval = config.get_value('Tasks', 'core_collection_interval') or 15 + secondary_interval = config.get_value('Tasks', 'secondary_collection_interval') or 10 + facade_interval = config.get_value('Tasks', 'facade_collection_interval') or 10 + ml_interval = config.get_value('Tasks', 'ml_collection_interval') or 40 + if primary_repo_collect_phase.__name__ in enabled_phase_names: - enabled_collection_hooks.append(build_primary_repo_collect_request(session, logger, enabled_phase_names)) + enabled_collection_hooks.append(build_primary_repo_collect_request(session, logger, enabled_phase_names, core_interval)) if secondary_repo_collect_phase.__name__ in enabled_phase_names: - enabled_collection_hooks.append(build_secondary_repo_collect_request(session, logger, enabled_phase_names)) + enabled_collection_hooks.append(build_secondary_repo_collect_request(session, logger, enabled_phase_names, secondary_interval)) #start_secondary_collection(session, max_repo=10) if facade_phase.__name__ in enabled_phase_names: #start_facade_collection(session, max_repo=30) - enabled_collection_hooks.append(build_facade_repo_collect_request(session, logger, enabled_phase_names)) + enabled_collection_hooks.append(build_facade_repo_collect_request(session, logger, enabled_phase_names, facade_interval)) if not RUNNING_DOCKER and machine_learning_phase.__name__ in enabled_phase_names: - enabled_collection_hooks.append(build_ml_repo_collect_request(session, logger, enabled_phase_names)) + enabled_collection_hooks.append(build_ml_repo_collect_request(session, logger, enabled_phase_names, ml_interval)) #start_ml_collection(session,max_repo=5) logger.info(f"Starting collection phases: {[h.name for h in enabled_collection_hooks]}") From 28045efee5b10e46b73b30ae31ff2305d755ebe4 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 31 Oct 2025 09:20:48 -0400 Subject: [PATCH 061/105] add unit values to the new variables --- augur/application/config.py | 8 ++++---- augur/tasks/start_tasks.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 89e8fc57f0..2c0ae82996 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -81,10 +81,10 @@ def get_development_flag(): }, "Tasks": { "collection_interval": 30, - "core_collection_interval": 15, - "secondary_collection_interval": 10, - "facade_collection_interval": 10, - "ml_collection_interval": 40 + "core_collection_interval_days": 15, + "secondary_collection_interval_days": 10, + "facade_collection_interval_days": 10, + "ml_collection_interval_days": 40 }, "Message_Insights": { "insight_days": 30, diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 4a46f84705..8c011df390 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -255,10 +255,10 @@ def augur_collection_monitor(self): # Get config values for collection intervals config = AugurConfig(logger, session) - core_interval = config.get_value('Tasks', 'core_collection_interval') or 15 - secondary_interval = config.get_value('Tasks', 'secondary_collection_interval') or 10 - facade_interval = config.get_value('Tasks', 'facade_collection_interval') or 10 - ml_interval = config.get_value('Tasks', 'ml_collection_interval') or 40 + core_interval = config.get_value('Tasks', 'core_collection_interval_days') or 15 + secondary_interval = config.get_value('Tasks', 'secondary_collection_interval_days') or 10 + facade_interval = config.get_value('Tasks', 'facade_collection_interval_days') or 10 + ml_interval = config.get_value('Tasks', 'ml_collection_interval_days') or 40 if primary_repo_collect_phase.__name__ in enabled_phase_names: enabled_collection_hooks.append(build_primary_repo_collect_request(session, logger, enabled_phase_names, core_interval)) From b99a1821d53d96150687240b42035468c9ffc558 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 29 Oct 2025 17:03:42 -0400 Subject: [PATCH 062/105] add note to the docs about the new config items --- docs/source/getting-started/collecting-data.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index ab7ce215c6..91d5f1ad2b 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -93,6 +93,9 @@ The celery monitor is responsible for generating the tasks that will tell the ot - ``refresh_materialized_views_interval_in_days``, number of days to wait between refreshes of materialized views. +If you choose, you can also adjust the values in the ``Tasks`` block if you would like to control when tasks should be re-run on a given repository. +This is specified as a number of days since the last successful run. + Adding repos for collection ----------------------------- From 6ec1e4679b260e91404f4c825a8bbd0849734877 Mon Sep 17 00:00:00 2001 From: Tudor Gradinaru Date: Sat, 1 Nov 2025 19:22:21 +0200 Subject: [PATCH 063/105] Add guidance for joining CHAOSS Slack to CONTRIBUTING.md Fixes #3365 Signed-off-by: Tudor Gradinaru --- CONTRIBUTING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 99d09cae9d..2a2af0b9e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -97,6 +97,7 @@ git push origin master ### CHAOSS - [Website](https://chaoss.community/) - [Get Involved](https://chaoss.community/participate) +- [Join the CHAOSS Slack](https://chaoss.community/kb-getting-started/) - Join the `#wg-augur-8knot` channel to participate in discussions, meetings, and planning - [Metrics](https://github.com/chaoss/metrics) - [Evolution Metrics Working Group](https://github.com/chaoss/wg-evolution) - [Common Metrics Working Group](https://github.com/chaoss/wg-common) From e445b860886f4c5e5d226bd58d872d0a6dbf4535 Mon Sep 17 00:00:00 2001 From: Kabir Panda Date: Fri, 31 Oct 2025 02:23:44 +0530 Subject: [PATCH 064/105] Fix: ensure redirect to user group view includes required group path param and validate inputs Signed-off-by: Kabir Panda --- augur/api/view/api.py | 49 ++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index 21d182024f..eee99c93c4 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -1,11 +1,20 @@ -from flask import request, jsonify, redirect, url_for, flash, current_app +import logging import re + +from flask import flash, current_app, jsonify, redirect, request, url_for from flask_login import current_user, login_required + from augur.application.db.models import Repo, RepoGroup, UserGroup, UserRepo -from augur.tasks.frontend import add_github_orgs_and_repos, parse_org_and_repo_name, parse_org_name, add_gitlab_repos -from .utils import * -from ..server import app from augur.application.db.session import DatabaseSession +from augur.tasks.frontend import ( + add_github_orgs_and_repos, + add_gitlab_repos, + parse_org_and_repo_name, + parse_org_name +) + +from ..server import app +from .utils import * @app.route('/cache/file/') @app.route('/cache/file/') @@ -155,21 +164,37 @@ def user_remove_repo(): group = request.args.get("group_name") repo = request.args.get("repo_id") - if not repo: - flash("No repo id provided") - if not group: - flash("No group name provided") - - repo = int(repo) + + if not repo or not group: + if not repo: + flash("No repo id provided") + if not group: + flash("No group name provided") + # Staying on same page instead of redirecting to settings + return redirect(url_for("user_group_view", group=group)) + + try: + repo_id = int(repo) + except (TypeError, ValueError) as e: + flash("Invalid repo id provided") + + logging.error(f"Invalid repo id provided for repo '{repo}'. Error: {e}") + + + return redirect(url_for("user_group_view", group=group)) - result = current_user.remove_repo(group, repo)[0] + result = current_user.remove_repo(group, repo_id)[0] if result: flash(f"Successfully removed repo {repo} from group {group}") else: flash("An error occurred removing repo from group") - return redirect(url_for("user_group_view") + f"?group={group}") + + return redirect(url_for("user_group_view", group=group)) + + + @app.route('/account/application/deauthorize') @login_required From e2f41c9e704d066c7363ee2d4aeaf46c4bb9a493 Mon Sep 17 00:00:00 2001 From: Tudor Gradinaru Date: Sun, 2 Nov 2025 15:52:12 +0200 Subject: [PATCH 065/105] Add prominent Slack community section near the top of CONTRIBUTING.md Signed-off-by: Tudor Gradinaru --- CONTRIBUTING.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2a2af0b9e1..0b635edd43 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,6 +4,10 @@ We love to pull requests from everyone! We follow the standard Git workflow of ` If you are new to open source, we recommend GitHub's excellent guide on "[How to Contribute to Open Source](https://opensource.guide/how-to-contribute/)". In addition, please feel free to reach out to any of the maintainers or other community members if you are struggling as we are here to help you learn! +## Join the Community + +We encourage all contributors to join the [CHAOSS Slack workspace](https://chaoss.community/kb-getting-started/) and participate in the `#wg-augur-8knot` channel. This is a great place to ask questions, get help with issues, participate in discussions, and stay updated on community meetings and planning. Don't hesitate to introduce yourself and ask for help if you get stuck! + Before getting started, please make sure you've read the [README](README.md) to get a primer on our project. Augur's documentation can be found [here](https://oss-augur.readthedocs.io/en/main/). ## Opening an issue From 6496aadbc01a59d60a2e78f57c20581f79ba966f Mon Sep 17 00:00:00 2001 From: Tudor Gradinaru Date: Tue, 4 Nov 2025 09:35:26 +0200 Subject: [PATCH 066/105] Move 'Join the Community' section below docs introduction line Signed-off-by: Tudor Gradinaru --- CONTRIBUTING.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0b635edd43..4df2a8dff2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,12 +4,12 @@ We love to pull requests from everyone! We follow the standard Git workflow of ` If you are new to open source, we recommend GitHub's excellent guide on "[How to Contribute to Open Source](https://opensource.guide/how-to-contribute/)". In addition, please feel free to reach out to any of the maintainers or other community members if you are struggling as we are here to help you learn! +Before getting started, please make sure you've read the [README](README.md) to get a primer on our project. Augur's documentation can be found [here](https://oss-augur.readthedocs.io/en/main/). + ## Join the Community We encourage all contributors to join the [CHAOSS Slack workspace](https://chaoss.community/kb-getting-started/) and participate in the `#wg-augur-8knot` channel. This is a great place to ask questions, get help with issues, participate in discussions, and stay updated on community meetings and planning. Don't hesitate to introduce yourself and ask for help if you get stuck! -Before getting started, please make sure you've read the [README](README.md) to get a primer on our project. Augur's documentation can be found [here](https://oss-augur.readthedocs.io/en/main/). - ## Opening an issue If you're experiencing an issue with Augur or have a question you'd like help answering, please feel free to open an [issue](https://github.com/chaoss/augur/issues). To help us prevent duplicates, we kindly ask that you briefly search for your problem or question in our [issues](https://github.com/chaoss/augur/issues) before opening a new one. From 7a8569fad004fd26be4a21d5d1706373cd4389de Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 16:44:51 -0400 Subject: [PATCH 067/105] rearrange dev deps (and unlock some) so the toml is valid Signed-off-by: Adrian Edwards --- pyproject.toml | 14 +++++++++----- uv.lock | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a3866d86e1..0d2870729f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,17 +95,21 @@ dependencies = [ [dependency-groups] dev = [ - "tox==3.24.4", - "pytest==6.2.5", - "toml>=0.10.2", - "ipdb==0.13.9", + { include-group = "lint" }, + { include-group = "test" }, + { include-group = "debug" }, + { include-group = "docs" }, +] +lint = [ + "pylint", "mypy>=1.18.2", "types-requests>=2.31.0.6", "types-pyyaml>=6.0.12.20250915", "types-python-dateutil>=2.9.0.20251008", "types-toml>=0.10.8.20240310", - { include-group = "docs" }, ] +test = ["tox", "pytest"] +debug = ["ipdb==0.13.9"] docs = [ "docutils==0.20.1", # setuptools is needed for pkg_resources due to sphinxcontrib-redoc diff --git a/uv.lock b/uv.lock index 18681966a7..972d34aebb 100644 --- a/uv.lock +++ b/uv.lock @@ -215,17 +215,20 @@ dependencies = [ ] [package.dev-dependencies] +debug = [ + { name = "ipdb" }, +] dev = [ { name = "docutils" }, { name = "ipdb" }, { name = "mypy" }, + { name = "pylint" }, { name = "pytest" }, { name = "setuptools" }, { name = "sphinx" }, { name = "sphinx-rtd-theme" }, { name = "sphinxcontrib-openapi" }, { name = "sphinxcontrib-redoc" }, - { name = "toml" }, { name = "tox" }, { name = "types-python-dateutil" }, { name = "types-pyyaml" }, @@ -240,6 +243,18 @@ docs = [ { name = "sphinxcontrib-openapi" }, { name = "sphinxcontrib-redoc" }, ] +lint = [ + { name = "mypy" }, + { name = "pylint" }, + { name = "types-python-dateutil" }, + { name = "types-pyyaml" }, + { name = "types-requests" }, + { name = "types-toml" }, +] +test = [ + { name = "pytest" }, + { name = "tox" }, +] [package.metadata] requires-dist = [ @@ -318,18 +333,19 @@ requires-dist = [ ] [package.metadata.requires-dev] +debug = [{ name = "ipdb", specifier = "==0.13.9" }] dev = [ { name = "docutils", specifier = "==0.20.1" }, { name = "ipdb", specifier = "==0.13.9" }, { name = "mypy", specifier = ">=1.18.2" }, - { name = "pytest", specifier = "==6.2.5" }, + { name = "pylint" }, + { name = "pytest" }, { name = "setuptools" }, { name = "sphinx", specifier = "==7.2.6" }, { name = "sphinx-rtd-theme", specifier = "==2.0.0" }, { name = "sphinxcontrib-openapi", specifier = "==0.8.3" }, { name = "sphinxcontrib-redoc", specifier = "==1.6.0" }, - { name = "toml", specifier = ">=0.10.2" }, - { name = "tox", specifier = "==3.24.4" }, + { name = "tox" }, { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, { name = "types-requests", specifier = ">=2.31.0.6" }, @@ -343,6 +359,18 @@ docs = [ { name = "sphinxcontrib-openapi", specifier = "==0.8.3" }, { name = "sphinxcontrib-redoc", specifier = "==1.6.0" }, ] +lint = [ + { name = "mypy", specifier = ">=1.18.2" }, + { name = "pylint" }, + { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, + { name = "types-requests", specifier = ">=2.31.0.6" }, + { name = "types-toml", specifier = ">=0.10.8.20240310" }, +] +test = [ + { name = "pytest" }, + { name = "tox" }, +] [[package]] name = "babel" From 23adc0889225c0ed9963bf4fa25e9f0866df7f8b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 12:54:13 -0500 Subject: [PATCH 068/105] only test on current python Signed-off-by: Adrian Edwards --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index ceeb03155b..4fd797a36f 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ # and then run "tox" from this directory. [tox] -envlist = py{,36,37,38}-{metric-routes,application, workers} +envlist = py{,311}-{metric-routes,application, workers} skip_missing_interpreters = true [testenv] From bde4013e575d6fc5a579337140cd6031dcec11f0 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 13:00:13 -0500 Subject: [PATCH 069/105] embed tox ini in pyproject.toml Signed-off-by: Adrian Edwards --- pyproject.toml | 25 +++++++++++++++++++++++++ tox.ini | 27 --------------------------- 2 files changed, 25 insertions(+), 27 deletions(-) delete mode 100644 tox.ini diff --git a/pyproject.toml b/pyproject.toml index 0d2870729f..2a511219bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -159,6 +159,31 @@ constraint-dependencies = [ "graphql-server-core>1.1.1", ] +[tool.tox] +legacy_tox_ini = """ + [tox] + envlist = py{,311}-{metric-routes,application, workers} + skip_missing_interpreters = true + + [testenv] + passenv = AUGUR_* + whitelist_externals = make + /bin/bash + deps = + pytest + setenv = + AUGUR_LOG_DEBUG = 0 + AUGUR_LOG_QUIET = 1 + commands = + application: pytest tests --ignore=tests/test_routes --ignore=tests/test_workers + metric-routes: python tests/test_routes/runner.py + workers: pytest tests/test_workers/ + worker-persistance: pytest test/test_workers/worker_persistance/ + + [pytest] + addopts = -ra -s +""" + [tool.mypy] files = ['augur/application/db/*.py'] ignore_missing_imports = true diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 4fd797a36f..0000000000 --- a/tox.ini +++ /dev/null @@ -1,27 +0,0 @@ -#SPDX-License-Identifier: MIT -# tox (https://tox.readthedocs.io/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - -[tox] -envlist = py{,311}-{metric-routes,application, workers} -skip_missing_interpreters = true - -[testenv] -passenv = AUGUR_* -whitelist_externals = make - /bin/bash -deps = - pytest -setenv = - AUGUR_LOG_DEBUG = 0 - AUGUR_LOG_QUIET = 1 -commands = - application: pytest tests --ignore=tests/test_routes --ignore=tests/test_workers - metric-routes: python tests/test_routes/runner.py - workers: pytest tests/test_workers/ - worker-persistance: pytest test/test_workers/worker_persistance/ - -[pytest] -addopts = -ra -s From 11d23321dc7dfa3f2be5cc9f9ef453b54ab81bce Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 13:00:29 -0500 Subject: [PATCH 070/105] use tox4 Signed-off-by: Adrian Edwards --- pyproject.toml | 2 +- uv.lock | 216 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 192 insertions(+), 26 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2a511219bb..f89f1b7cfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,7 @@ lint = [ "types-python-dateutil>=2.9.0.20251008", "types-toml>=0.10.8.20240310", ] -test = ["tox", "pytest"] +test = ["tox>=4", "pytest"] debug = ["ipdb==0.13.9"] docs = [ "docutils==0.20.1", diff --git a/uv.lock b/uv.lock index 972d34aebb..fa2b1eba28 100644 --- a/uv.lock +++ b/uv.lock @@ -229,7 +229,8 @@ dev = [ { name = "sphinx-rtd-theme" }, { name = "sphinxcontrib-openapi" }, { name = "sphinxcontrib-redoc" }, - { name = "tox" }, + { name = "tox", version = "4.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tox", version = "4.32.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "types-python-dateutil" }, { name = "types-pyyaml" }, { name = "types-requests" }, @@ -253,7 +254,8 @@ lint = [ ] test = [ { name = "pytest" }, - { name = "tox" }, + { name = "tox", version = "4.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tox", version = "4.32.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] [package.metadata] @@ -345,7 +347,7 @@ dev = [ { name = "sphinx-rtd-theme", specifier = "==2.0.0" }, { name = "sphinxcontrib-openapi", specifier = "==0.8.3" }, { name = "sphinxcontrib-redoc", specifier = "==1.6.0" }, - { name = "tox" }, + { name = "tox", specifier = ">=4" }, { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, { name = "types-requests", specifier = ">=2.31.0.6" }, @@ -369,7 +371,7 @@ lint = [ ] test = [ { name = "pytest" }, - { name = "tox" }, + { name = "tox", specifier = ">=4" }, ] [[package]] @@ -472,11 +474,27 @@ sdist = { url = "https://files.pythonhosted.org/packages/10/ed/7e8b97591f6f45617 name = "cachetools" version = "5.5.2" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, ] +[[package]] +name = "cachetools" +version = "6.2.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/7e/b975b5814bd36faf009faebe22c1072a1fa1168db34d285ef0ba071ad78c/cachetools-6.2.1.tar.gz", hash = "sha256:3f391e4bd8f8bf0931169baf7456cc822705f4e2a31f840d218f445b9a854201", size = 31325, upload-time = "2025-10-12T14:55:30.139Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl", hash = "sha256:09868944b6dde876dfd44e1d47e18484541eaf12f26f29b7af91b26cc892d701", size = 11280, upload-time = "2025-10-12T14:55:28.382Z" }, +] + [[package]] name = "celery" version = "5.5.3" @@ -505,6 +523,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/ae/320161bd181fc06471eed047ecce67b693fd7515b16d495d8932db763426/certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", size = 157650, upload-time = "2025-06-15T02:45:49.977Z" }, ] +[[package]] +name = "chardet" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.2" @@ -888,11 +915,27 @@ wheels = [ name = "filelock" version = "3.18.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, ] +[[package]] +name = "filelock" +version = "3.20.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922, upload-time = "2025-10-08T18:03:50.056Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" }, +] + [[package]] name = "flask" version = "2.0.2" @@ -1073,22 +1116,44 @@ wheels = [ name = "google-auth" version = "2.40.3" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] dependencies = [ - { name = "cachetools" }, - { name = "pyasn1-modules" }, - { name = "rsa" }, + { name = "cachetools", version = "5.5.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pyasn1-modules", marker = "python_full_version < '3.11'" }, + { name = "rsa", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/9e/9b/e92ef23b84fa10a64ce4831390b7a4c2e53c0132568d99d4ae61d04c8855/google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77", size = 281029, upload-time = "2025-06-04T18:04:57.577Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/17/63/b19553b658a1692443c62bd07e5868adaa0ad746a0751ba62c59568cd45b/google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca", size = 216137, upload-time = "2025-06-04T18:04:55.573Z" }, ] +[[package]] +name = "google-auth" +version = "2.42.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "cachetools", version = "6.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pyasn1-modules", marker = "python_full_version >= '3.11'" }, + { name = "rsa", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/6b/22a77135757c3a7854c9f008ffed6bf4e8851616d77faf13147e9ab5aae6/google_auth-2.42.1.tar.gz", hash = "sha256:30178b7a21aa50bffbdc1ffcb34ff770a2f65c712170ecd5446c4bef4dc2b94e", size = 295541, upload-time = "2025-10-30T16:42:19.381Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/05/adeb6c495aec4f9d93f9e2fc29eeef6e14d452bba11d15bdb874ce1d5b10/google_auth-2.42.1-py2.py3-none-any.whl", hash = "sha256:eb73d71c91fc95dbd221a2eb87477c278a355e7367a35c0d84e6b0e5f9b4ad11", size = 222550, upload-time = "2025-10-30T16:42:17.878Z" }, +] + [[package]] name = "google-auth-oauthlib" version = "1.2.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "google-auth" }, + { name = "google-auth", version = "2.40.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "google-auth", version = "2.42.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "requests-oauthlib" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" } @@ -2371,11 +2436,27 @@ wheels = [ name = "platformdirs" version = "4.3.8" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, ] +[[package]] +name = "platformdirs" +version = "4.5.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/61/33/9611380c2bdb1225fdef633e2a9610622310fed35ab11dac9620972ee088/platformdirs-4.5.0.tar.gz", hash = "sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312", size = 21632, upload-time = "2025-10-08T17:44:48.791Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -2545,7 +2626,8 @@ dependencies = [ { name = "dill" }, { name = "isort" }, { name = "mccabe" }, - { name = "platformdirs" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "platformdirs", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "tomlkit" }, ] @@ -2578,6 +2660,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" }, ] +[[package]] +name = "pyproject-api" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "packaging", marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/fd/437901c891f58a7b9096511750247535e891d2d5a5a6eefbc9386a2b41d5/pyproject_api-1.9.1.tar.gz", hash = "sha256:43c9918f49daab37e302038fc1aed54a8c7a91a9fa935d00b9a485f37e0f5335", size = 22710, upload-time = "2025-05-12T14:41:58.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/e6/c293c06695d4a3ab0260ef124a74ebadba5f4c511ce3a4259e976902c00b/pyproject_api-1.9.1-py3-none-any.whl", hash = "sha256:7d6238d92f8962773dd75b5f0c4a6a27cce092a14b623b811dba656f3b628948", size = 13158, upload-time = "2025-05-12T14:41:56.217Z" }, +] + +[[package]] +name = "pyproject-api" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "packaging", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/45/7b/c0e1333b61d41c69e59e5366e727b18c4992688caf0de1be10b3e5265f6b/pyproject_api-1.10.0.tar.gz", hash = "sha256:40c6f2d82eebdc4afee61c773ed208c04c19db4c4a60d97f8d7be3ebc0bbb330", size = 22785, upload-time = "2025-10-09T19:12:27.21Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/cc/cecf97be298bee2b2a37dd360618c819a2a7fd95251d8e480c1f0eb88f3b/pyproject_api-1.10.0-py3-none-any.whl", hash = "sha256:8757c41a79c0f4ab71b99abed52b97ecf66bd20b04fa59da43b5840bac105a09", size = 13218, upload-time = "2025-10-09T19:12:24.428Z" }, +] + [[package]] name = "pyreadline3" version = "3.5.4" @@ -3535,7 +3649,8 @@ version = "2.15.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "absl-py" }, - { name = "google-auth" }, + { name = "google-auth", version = "2.40.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "google-auth", version = "2.42.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "google-auth-oauthlib" }, { name = "grpcio" }, { name = "markdown" }, @@ -3656,7 +3771,8 @@ version = "3.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py", extra = ["linkify", "plugins"] }, - { name = "platformdirs" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "platformdirs", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "rich" }, { name = "typing-extensions" }, ] @@ -3791,21 +3907,50 @@ wheels = [ [[package]] name = "tox" -version = "3.24.4" +version = "4.20.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "filelock" }, - { name = "packaging" }, - { name = "pluggy" }, - { name = "py" }, - { name = "six" }, - { name = "toml" }, - { name = "virtualenv" }, + { name = "cachetools", version = "5.5.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "chardet", marker = "python_full_version < '3.11'" }, + { name = "colorama", marker = "python_full_version < '3.11'" }, + { name = "filelock", version = "3.18.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "packaging", marker = "python_full_version < '3.11'" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pluggy", marker = "python_full_version < '3.11'" }, + { name = "pyproject-api", version = "1.9.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "virtualenv", version = "20.31.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d2/78/ad720ade1c6c5b24e407856fb8fc578896ed8e2a603832bb85be3825b551/tox-3.24.4.tar.gz", hash = "sha256:c30b57fa2477f1fb7c36aa1d83292d5c2336cd0018119e1b1c17340e2c2708ca", size = 316762, upload-time = "2021-09-16T09:45:00.904Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/4a/55f9dba99aad874ae54a7fb2310c940e978fd0155eb3576ddebec000fca7/tox-4.20.0.tar.gz", hash = "sha256:5b78a49b6eaaeab3ae4186415e7c97d524f762ae967c63562687c3e5f0ec23d5", size = 181364, upload-time = "2024-09-19T03:46:15.252Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/78/b0/ce98616ec9c3f270495a2493cde4d81b1f499057222ae77a8103aea59777/tox-3.24.4-py2.py3-none-any.whl", hash = "sha256:5e274227a53dc9ef856767c21867377ba395992549f02ce55eb549f9fb9a8d10", size = 85645, upload-time = "2021-09-16T09:44:58.664Z" }, + { url = "https://files.pythonhosted.org/packages/cf/ee/6f9bf37f197578f98fb450f1aeebf4570f85b24b00d846bbde6e11489bd1/tox-4.20.0-py3-none-any.whl", hash = "sha256:21a8005e3d3fe5658a8e36b8ca3ed13a4230429063c5cc2a2fdac6ee5aa0de34", size = 157087, upload-time = "2024-09-19T03:46:12.754Z" }, +] + +[[package]] +name = "tox" +version = "4.32.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "cachetools", version = "6.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "chardet", marker = "python_full_version >= '3.11'" }, + { name = "colorama", marker = "python_full_version >= '3.11'" }, + { name = "filelock", version = "3.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging", marker = "python_full_version >= '3.11'" }, + { name = "platformdirs", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pluggy", marker = "python_full_version >= '3.11'" }, + { name = "pyproject-api", version = "1.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "virtualenv", version = "20.35.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/59/bf/0e4dbd42724cbae25959f0e34c95d0c730df03ab03f54d52accd9abfc614/tox-4.32.0.tar.gz", hash = "sha256:1ad476b5f4d3679455b89a992849ffc3367560bbc7e9495ee8a3963542e7c8ff", size = 203330, upload-time = "2025-10-24T18:03:38.132Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/cc/e09c0d663a004945f82beecd4f147053567910479314e8d01ba71e5d5dea/tox-4.32.0-py3-none-any.whl", hash = "sha256:451e81dc02ba8d1ed20efd52ee409641ae4b5d5830e008af10fe8823ef1bd551", size = 175905, upload-time = "2025-10-24T18:03:36.337Z" }, ] [[package]] @@ -3926,16 +4071,37 @@ wheels = [ name = "virtualenv" version = "20.31.2" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] dependencies = [ - { name = "distlib" }, - { name = "filelock" }, - { name = "platformdirs" }, + { name = "distlib", marker = "python_full_version < '3.11'" }, + { name = "filelock", version = "3.18.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" }, ] +[[package]] +name = "virtualenv" +version = "20.35.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "distlib", marker = "python_full_version >= '3.11'" }, + { name = "filelock", version = "3.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "platformdirs", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/20/28/e6f1a6f655d620846bd9df527390ecc26b3805a0c5989048c210e22c5ca9/virtualenv-20.35.4.tar.gz", hash = "sha256:643d3914d73d3eeb0c552cbb12d7e82adf0e504dbf86a3182f8771a153a1971c", size = 6028799, upload-time = "2025-10-29T06:57:40.511Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/0c/c05523fa3181fdf0c9c52a6ba91a23fbf3246cc095f26f6516f9c60e6771/virtualenv-20.35.4-py3-none-any.whl", hash = "sha256:c21c9cede36c9753eeade68ba7d523529f228a403463376cf821eaae2b650f1b", size = 6005095, upload-time = "2025-10-29T06:57:37.598Z" }, +] + [[package]] name = "wcwidth" version = "0.2.13" From e614b745bda82272b793c693a5c352d49f068410 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 17:07:30 -0400 Subject: [PATCH 071/105] use tox4 with uv plugin Signed-off-by: Adrian Edwards --- pyproject.toml | 3 ++- uv.lock | 71 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f89f1b7cfa..bdc7f87fde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,7 +108,7 @@ lint = [ "types-python-dateutil>=2.9.0.20251008", "types-toml>=0.10.8.20240310", ] -test = ["tox>=4", "pytest"] +test = ["tox>=4.0", "tox-uv", "pytest"] debug = ["ipdb==0.13.9"] docs = [ "docutils==0.20.1", @@ -164,6 +164,7 @@ legacy_tox_ini = """ [tox] envlist = py{,311}-{metric-routes,application, workers} skip_missing_interpreters = true + requires = tox-uv [testenv] passenv = AUGUR_* diff --git a/uv.lock b/uv.lock index fa2b1eba28..37df99ba32 100644 --- a/uv.lock +++ b/uv.lock @@ -231,6 +231,8 @@ dev = [ { name = "sphinxcontrib-redoc" }, { name = "tox", version = "4.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "tox", version = "4.32.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tox-uv", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tox-uv", version = "1.29.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "types-python-dateutil" }, { name = "types-pyyaml" }, { name = "types-requests" }, @@ -256,6 +258,8 @@ test = [ { name = "pytest" }, { name = "tox", version = "4.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "tox", version = "4.32.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tox-uv", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tox-uv", version = "1.29.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] [package.metadata] @@ -347,7 +351,8 @@ dev = [ { name = "sphinx-rtd-theme", specifier = "==2.0.0" }, { name = "sphinxcontrib-openapi", specifier = "==0.8.3" }, { name = "sphinxcontrib-redoc", specifier = "==1.6.0" }, - { name = "tox", specifier = ">=4" }, + { name = "tox", specifier = ">=4.0" }, + { name = "tox-uv" }, { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, { name = "types-requests", specifier = ">=2.31.0.6" }, @@ -371,7 +376,8 @@ lint = [ ] test = [ { name = "pytest" }, - { name = "tox", specifier = ">=4" }, + { name = "tox", specifier = ">=4.0" }, + { name = "tox-uv" }, ] [[package]] @@ -3953,6 +3959,41 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/cc/e09c0d663a004945f82beecd4f147053567910479314e8d01ba71e5d5dea/tox-4.32.0-py3-none-any.whl", hash = "sha256:451e81dc02ba8d1ed20efd52ee409641ae4b5d5830e008af10fe8823ef1bd551", size = 175905, upload-time = "2025-10-24T18:03:36.337Z" }, ] +[[package]] +name = "tox-uv" +version = "1.13.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "packaging", marker = "python_full_version < '3.11'" }, + { name = "tox", version = "4.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "uv", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/93/1f06c3cbfd4c1aa23859d49a76c7e65b51e60715bc22b2dd16cbff9c1e71/tox_uv-1.13.1.tar.gz", hash = "sha256:a8504b8db4bf6c81cba7cd3518851a3f1e0f6991d22272a4cc08ebe1b7f38cca", size = 15645, upload-time = "2024-10-11T16:14:57.301Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/8e/94afb25547f5e4987801e8f6aa11e357190f72f31eb363267a3cb2fa6a88/tox_uv-1.13.1-py3-none-any.whl", hash = "sha256:b163dd28ca37a9f4c6d8cbac11153be27c2e929b58bcae62e323ffa8f71c327d", size = 13383, upload-time = "2024-10-11T16:14:55.885Z" }, +] + +[[package]] +name = "tox-uv" +version = "1.29.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "packaging", marker = "python_full_version >= '3.11'" }, + { name = "tox", version = "4.32.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "uv", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/90/06752775b8cfadba8856190f5beae9f552547e0f287e0246677972107375/tox_uv-1.29.0.tar.gz", hash = "sha256:30fa9e6ad507df49d3c6a2f88894256bcf90f18e240a00764da6ecab1db24895", size = 23427, upload-time = "2025-10-09T20:40:27.384Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/17/221d62937c4130b044bb437caac4181e7e13d5536bbede65264db1f0ac9f/tox_uv-1.29.0-py3-none-any.whl", hash = "sha256:b1d251286edeeb4bc4af1e24c8acfdd9404700143c2199ccdbb4ea195f7de6cc", size = 17254, upload-time = "2025-10-09T20:40:25.885Z" }, +] + [[package]] name = "tqdm" version = "4.67.1" @@ -4058,6 +4099,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225, upload-time = "2024-08-29T15:43:08.921Z" }, ] +[[package]] +name = "uv" +version = "0.9.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/f6/9914f57d152cfcb85f3a26f8fbac3c88e4eb9cbe88639076241e16819334/uv-0.9.7.tar.gz", hash = "sha256:555ee72146b8782c73d755e4a21c9885c6bfc81db0ffca2220d52dddae007eb7", size = 3705596, upload-time = "2025-10-30T22:17:18.652Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/38/cee64a9dcefd46f83a922c4e31d9cd9d91ce0d27a594192f7df677151eb4/uv-0.9.7-py3-none-linux_armv6l.whl", hash = "sha256:134e0daac56f9e399ccdfc9e4635bc0a13c234cad9224994c67bae462e07399a", size = 20614967, upload-time = "2025-10-30T22:16:31.274Z" }, + { url = "https://files.pythonhosted.org/packages/6f/b7/1b1ff8dfde05e9d27abf29ebf22da48428fe1e16f0b4d65a839bd2211303/uv-0.9.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:1aaf79b4234400e9e2fbf5b50b091726ccbb0b6d4d032edd3dfd4c9673d89dca", size = 19692886, upload-time = "2025-10-30T22:16:35.893Z" }, + { url = "https://files.pythonhosted.org/packages/f5/7d/b618174d8a8216af350398ace03805b2b2df6267b1745abf45556c2fda58/uv-0.9.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0fdbfad5b367e7a3968264af6da5bbfffd4944a90319042f166e8df1a2d9de09", size = 18345022, upload-time = "2025-10-30T22:16:38.45Z" }, + { url = "https://files.pythonhosted.org/packages/13/4c/03fafb7d28289d54ac7a34507f1e97e527971f8b0ee2c5e957045966a1a6/uv-0.9.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:635e82c2d0d8b001618af82e4f2724350f15814f6462a71b3ebd44adec21f03c", size = 20170427, upload-time = "2025-10-30T22:16:41.099Z" }, + { url = "https://files.pythonhosted.org/packages/35/0e/f1316da150453755bb88cf4232e8934de71a0091eb274a8b69d948535453/uv-0.9.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56a440ccde7624a7bc070e1c2492b358c67aea9b8f17bc243ea27c5871c8d02c", size = 20234277, upload-time = "2025-10-30T22:16:43.521Z" }, + { url = "https://files.pythonhosted.org/packages/37/b8/cb62cd78151b235c5da9290f0e3fb032b36706f2922208a691678aa0f2df/uv-0.9.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b5f1fb8203a77853db176000e8f30d5815ab175dc46199db059f97a72fc51110", size = 21180078, upload-time = "2025-10-30T22:16:45.857Z" }, + { url = "https://files.pythonhosted.org/packages/be/e5/6107249d23f06fa1739496e89699e76169037b4643144b28b324efc3075d/uv-0.9.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:bb8bfcc2897f7653522abc2cae80233af756ad857bfbbbbe176f79460cbba417", size = 22743896, upload-time = "2025-10-30T22:16:48.487Z" }, + { url = "https://files.pythonhosted.org/packages/df/94/69d8e0bb29c140305e7677bc8c98c765468a55cb10966e77bb8c69bf815d/uv-0.9.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89697fa0d7384ba047daf75df844ee7800235105e41d08e0c876861a2b4aa90e", size = 22361126, upload-time = "2025-10-30T22:16:51.366Z" }, + { url = "https://files.pythonhosted.org/packages/c0/0d/d186456cd0d7972ed026e5977b8a12e1f94c923fc3d6e86c7826c6f0d1fe/uv-0.9.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9810ee8173dce129c49b338d5e97f3d7c7e9435f73e0b9b26c2f37743d3bb9e", size = 21477489, upload-time = "2025-10-30T22:16:53.757Z" }, + { url = "https://files.pythonhosted.org/packages/c7/59/61d8e9f1734069049abe9e593961de602397c7194712346906c075fec65f/uv-0.9.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cf6bc2482d1293cc630f66b862b494c09acda9b7faff7307ef52667a2b3ad49", size = 21382006, upload-time = "2025-10-30T22:16:56.117Z" }, + { url = "https://files.pythonhosted.org/packages/74/ac/090dbde63abb56001190392d29ca2aa654eebc146a693b5dda68da0df2fb/uv-0.9.7-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:7019f4416925f4091b9d28c1cf3e8444cf910c4ede76bdf1f6b9a56ca5f97985", size = 20255103, upload-time = "2025-10-30T22:16:58.434Z" }, + { url = "https://files.pythonhosted.org/packages/56/e7/ca2d99a4ce86366731547a84b5a2c946528b8d6d28c74ac659c925955a0c/uv-0.9.7-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:edd768f6730bba06aa10fdbd80ee064569f7236806f636bf65b68136a430aad0", size = 21311768, upload-time = "2025-10-30T22:17:01.259Z" }, + { url = "https://files.pythonhosted.org/packages/d8/1a/c5d9e57f52aa30bfee703e6b9e5b5072102cfc706f3444377bb0de79eac7/uv-0.9.7-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:d6e5fe28ca05a4b576c0e8da5f69251dc187a67054829cfc4afb2bfa1767114b", size = 20239129, upload-time = "2025-10-30T22:17:03.815Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ab/16110ca6b1c4aaad79b4f2c6bc102c416a906e5d29947d0dc774f6ef4365/uv-0.9.7-py3-none-musllinux_1_1_i686.whl", hash = "sha256:34fe0af83fcafb9e2b786f4bd633a06c878d548a7c479594ffb5607db8778471", size = 20647326, upload-time = "2025-10-30T22:17:06.33Z" }, + { url = "https://files.pythonhosted.org/packages/89/a9/2a8129c796831279cc0c53ffdd19dd6133d514805e52b1ef8a2aa0ff8912/uv-0.9.7-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:777bb1de174319245a35e4f805d3b4484d006ebedae71d3546f95e7c28a5f436", size = 21604958, upload-time = "2025-10-30T22:17:09.046Z" }, + { url = "https://files.pythonhosted.org/packages/73/97/616650cb4dd5fbaabf8237469e1bc84710ae878095d359999982e1bc8ecf/uv-0.9.7-py3-none-win32.whl", hash = "sha256:bcf878528bd079fe8ae15928b5dfa232fac8b0e1854a2102da6ae1a833c31276", size = 19418913, upload-time = "2025-10-30T22:17:11.384Z" }, + { url = "https://files.pythonhosted.org/packages/de/7f/e3cdaffac70852f5ff933b04c7b8a06c0f91f41e563f04b689caa65b71bd/uv-0.9.7-py3-none-win_amd64.whl", hash = "sha256:62b315f62669899076a1953fba6baf50bd2b57f66f656280491331dcedd7e6c6", size = 21443513, upload-time = "2025-10-30T22:17:13.785Z" }, + { url = "https://files.pythonhosted.org/packages/89/79/8278452acae2fe96829485d32e1a2363829c9e42674704562ffcfc06b140/uv-0.9.7-py3-none-win_arm64.whl", hash = "sha256:d13da6521d4e841b1e0a9fda82e793dcf8458a323a9e8955f50903479d0bfa97", size = 19946729, upload-time = "2025-10-30T22:17:16.669Z" }, +] + [[package]] name = "vine" version = "5.1.0" From f43aa1c48082b75609c5fb640779b531d038dab7 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Thu, 6 Nov 2025 22:43:30 +0530 Subject: [PATCH 072/105] Fix: Added Direct and hyperlinked image Signed-off-by: PredictiveManish --- docs/source/getting-started/Welcome.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/source/getting-started/Welcome.rst b/docs/source/getting-started/Welcome.rst index aaec004651..2808176bb5 100644 --- a/docs/source/getting-started/Welcome.rst +++ b/docs/source/getting-started/Welcome.rst @@ -7,12 +7,15 @@ Now, as a new member, it can be overwhelming to navigate and sift through all th A few first things to do: -1. Join the slack channel, that's the fastest way to join and be a part of the community. +1. Join the slack channel, that's the fastest way to join and be a part of the community. +You can join using this invite link: +`Join the CHAOSS Slack Workspace `_ .. image:: images/slack.jpg :width: 400 :alt: "Slack logo" - + :target: https://join.slack.com/t/chaoss-workspace/shared_invite/zt-3hmaf3urr-boLCd7nRgcAvvfbWcqJJVw + 2. Introduce yourself to the #newcomers channel. Say hi, it'd help others get to know you and point you in the right direction. In case you're unsure, here's a format you can use: * Name From 28137e3f2d1e7d4df4de924ce7c697a1cbe1363e Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 3 Nov 2025 16:26:05 -0500 Subject: [PATCH 073/105] add pagination offset to config dict per TODO comment Signed-off-by: Adrian Edwards --- augur/application/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index 3fca15b25a..f2ba496b31 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -110,6 +110,9 @@ def get_development_flag(): "secondary_repo_collect_phase": 1, "facade_phase": 1, "machine_learning_phase": 0 + }, + "Frontend": { + "pagination_offset": 25 } } From 77a8012ae0889a8c8e51ffd6c9786e9c0c87542d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 3 Nov 2025 17:14:47 -0500 Subject: [PATCH 074/105] create a ConfigStore class to act as an interface for many possible config backends this includes an Exception class for attempts to write to non-writeable configs Signed-off-by: Adrian Edwards --- augur/application/config.py | 157 ++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index f2ba496b31..f6735dee68 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -368,3 +368,160 @@ def remove_section(self, section_name: str) -> None: def create_default_config(self) -> None: """Create default config in the database.""" self.load_config_from_dict(self.default_config) + + +class NotWriteableException(Exception): + """Custom Augur exception class to be used when trying to modify a config that is not writeable + """ + pass + +class ConfigStore(): + """A class representing the interface for various possible config backends. + This should not contain implementations unless they apply to all possible config backends + """ + + @property + def writable(self): + """Determine if this config store is writable. + + Returns: + True if the config store is writable, and False if it is not + """ + raise NotImplementedError() + + @property + def empty(self): + """Determine if this config store is empty. + + Returns: + True if the config store is empty, and False if it is not + """ + raise NotImplementedError() + + def load_dict(self, data: dict, ignore_existing=False): + """Load config into this store from dict values + + Args: + data (dict): the data to load + ignore_existing (bool, optional): whether to ignore any values or sections that exist already. Defaults to False. + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def retrieve_dict(self): + """Get the full config from this store as a dictionary. + + Returns: + dict: The dict representation of the config from this config store + """ + raise NotImplementedError() + + def clear(self): + """Remove all values from this config store. + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def remove_section(self, section_name: str) -> None: + """Remove a section from the config. + + Args: + section_name: The name of the section being deleted + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def has_section(self, section_name: str) -> bool: + """Determine if a section exists in this config. + + Args: + section_name: The name of the section to check for + + Returns: + True if the config store contains this section, and False if it is not + """ + raise NotImplementedError() + + def create_section(self, section_name: str, values: Optional[dict] = None, ignore_existing=False) -> None: + """Create a section in this config. + + Args: + section_name: The name of the section being deleted + values (Optional[dict], optional): Optional keys and values to populate in this section. Defaults to None. + ignore_existing (bool, optional): whether to ignore and overwrite an existing section or value with this name. Defaults to False. + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def get_section(self, section_name: str) -> dict: + """Return a section from this config store. + + Args: + section_name: The name of the section to check for + + Returns: + The section data as a dict + """ + raise NotImplementedError() + + def remove_value(self, section_name: str, value_key: str) -> None: + """Remove a value from the config. + + Args: + section_name: The name of the section the value is in + value_name: The key of the value being deleted + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def has_value(self, section_name: str, value_key: str) -> bool: + """Determine if a section exists in this config. + + Args: + section_name: The name of the section the value is in + value_key: The key at which to look for a value + + Returns: + True if the config store contains this value, and False if not + """ + raise NotImplementedError() + + def add_value(self, section_name: str, value_key: str, value, ignore_existing=False) -> None: + """Create a section in this config. + + Args: + section_name: The name of the section being deleted + value_key (str): The key at which to store this value + value (any): the value to store at this key + ignore_existing (bool, optional): whether to ignore and overwrite an existing value if encountered. Defaults to False. + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def get_value(self, section_name: str, value_key: str): + """Return a single value from this config store. + + Args: + section_name: The name of the section to check for + value_key (str): The key at which to look for a value + + Returns: + The section data as a dict + """ + raise NotImplementedError() + + + + From ab23de412aa066d6339dbd23b3206b98cdd58064 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 3 Nov 2025 17:29:31 -0500 Subject: [PATCH 075/105] implement the config store for json data Signed-off-by: Adrian Edwards --- augur/application/config.py | 89 +++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index f6735dee68..b22d3cc252 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -525,3 +525,92 @@ def get_value(self, section_name: str, value_key: str): +class JsonConfig(ConfigStore): + """A ConfigStore for handling JSON data + """ + + def __init__(self, json_data): + self.json_data = json_data + + @property + def writable(self): + return False + + @property + def empty(self): + return self.json_data == {} + + def load_dict(self, data: dict, ignore_existing=False): + if not self.writable: + raise NotWriteableException() + + if ignore_existing: + self.json_data = data + else: + self.json_data.update(data) + + def retrieve_dict(self): + return self.json_data + + def clear(self): + if not self.writable: + raise NotWriteableException() + + self.json_data = {} + + def remove_section(self, section_name: str) -> None: + if not self.writable: + raise NotWriteableException() + + del self.json_data[section_name] + + + def has_section(self, section_name: str) -> bool: + return section_name in self.json_data + + def create_section(self, section_name: str, values: Optional[dict] = None, ignore_existing=False) -> None: + if not self.writable: + raise NotWriteableException() + + if values is None: + values = {} + + if ignore_existing: + self.json_data[section_name] = values + else: + self.json_data[section_name].update(values) + + def get_section(self, section_name: str) -> dict: + if self.has_section(section_name): + return self.json_data[section_name] + + def remove_value(self, section_name: str, value_key: str) -> None: + if not self.writable: + raise NotWriteableException() + + if self.has_section(section_name): + del self.json_data[section_name][value_key] + + + def has_value(self, section_name: str, value_key: str) -> bool: + return self.has_section(section_name) and self.json_data[section_name].get(value_key, None) is not None + + def add_value(self, section_name: str, value_key: str, value, ignore_existing=False) -> None: + if not self.writable: + raise NotWriteableException() + + if not self.has_section(section_name): + self.create_section(section_name, {[value_key]: value}, ignore_existing=ignore_existing) + return + + if ignore_existing: + self.json_data[section_name][value_key] = value + else: + self.json_data[section_name][value_key].update(value) + + + def get_value(self, section_name: str, value_key: str): + if not self.has_section(section_name): + return None + + return self.json_data[section_name].get(value_key, None) From 3d98bfa6f84375017aa34f5c7ea7abc43d56c95c Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 13:27:42 -0500 Subject: [PATCH 076/105] add test case for some jsonconfig stuff Signed-off-by: Adrian Edwards --- tests/test_classes/test_config_stores.py | 54 ++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tests/test_classes/test_config_stores.py diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py new file mode 100644 index 0000000000..7789a12b84 --- /dev/null +++ b/tests/test_classes/test_config_stores.py @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: MIT +import pytest + +from augur.application.config import JsonConfig, DatabaseConfig, NotWriteableException + + +def test_jsonconfig_readonly_flags(): + cfg = JsonConfig({"A": {"x": 1}}) + assert cfg.writable is False + assert cfg.empty is False + + +def test_jsonconfig_empty_true_false(): + assert JsonConfig({}).empty is True + assert JsonConfig({"A": {}}).empty is False + + +def test_jsonconfig_retrieve_has_get(): + data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} + cfg = JsonConfig(data) + + # retrieve full dict + assert cfg.retrieve_dict() is data + + # has/get section + assert cfg.has_section("Alpha") is True + assert cfg.has_section("Missing") is False + assert cfg.get_section("Alpha") == {"a": 1, "b": "str"} + assert cfg.get_section("Missing") is None + + # has/get value + assert cfg.has_value("Alpha", "a") is True + assert cfg.has_value("Alpha", "missing") is False + assert cfg.has_value("Missing", "a") is False + assert cfg.get_value("Alpha", "a") == 1 + assert cfg.get_value("Alpha", "missing") is None + assert cfg.get_value("Missing", "a") is None + + +@pytest.mark.parametrize( + "callable_name, args, kwargs", + [ + ("load_dict", ({"X": {"y": 2}},), {"ignore_existing": False}), + ("clear", tuple(), {}), + ("remove_section", ("X",), {}), + ("create_section", ("X", {"y": 2}), {"ignore_existing": False}), + ("remove_value", ("X", "y"), {}), + ("add_value", ("X", "y", 2), {"ignore_existing": False}), + ], +) +def test_jsonconfig_mutations_raise_not_writable(callable_name, args, kwargs): + cfg = JsonConfig({"A": {"x": 1}}) + with pytest.raises(NotWriteableException): + getattr(cfg, callable_name)(*args, **kwargs) From 8118447524a31a702ecf9e3f1f1a53482ec313da Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 13:28:22 -0500 Subject: [PATCH 077/105] set up tox to run the new tests Signed-off-by: Adrian Edwards --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bdc7f87fde..ddaed4301d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -162,7 +162,7 @@ constraint-dependencies = [ [tool.tox] legacy_tox_ini = """ [tox] - envlist = py{,311}-{metric-routes,application, workers} + envlist = py{,311}-{classes} skip_missing_interpreters = true requires = tox-uv @@ -179,6 +179,7 @@ legacy_tox_ini = """ application: pytest tests --ignore=tests/test_routes --ignore=tests/test_workers metric-routes: python tests/test_routes/runner.py workers: pytest tests/test_workers/ + classes: pytest tests/test_classes/ worker-persistance: pytest test/test_workers/worker_persistance/ [pytest] From 8e0c979851fde595226a458a184119d51fb94a23 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 15:52:32 -0500 Subject: [PATCH 078/105] set up loggers in the ConfigStore classes Signed-off-by: Adrian Edwards --- augur/application/config.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index b22d3cc252..0d2f1134fd 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -6,6 +6,7 @@ from augur.application.db.models import Config from augur.application.db.util import execute_session_query, convert_type_of_value from pathlib import Path +import logging def get_development_flag_from_config(): @@ -380,6 +381,9 @@ class ConfigStore(): This should not contain implementations unless they apply to all possible config backends """ + def __init__(self, logger: logging.Logger): + self.logger = logger + @property def writable(self): """Determine if this config store is writable. @@ -529,7 +533,8 @@ class JsonConfig(ConfigStore): """A ConfigStore for handling JSON data """ - def __init__(self, json_data): + def __init__(self, json_data, logger: logging.Logger): + super().__init__(logger) self.json_data = json_data @property From 755642ab9d00e83e06159163fc8c08111d56de10 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 16:04:23 -0500 Subject: [PATCH 079/105] Add DatabaseConfig implementation of ConfigStore to handle config stored in the database Signed-off-by: Adrian Edwards --- augur/application/config.py | 172 +++++++++++++++++++++++ tests/test_classes/test_config_stores.py | 41 ++++++ 2 files changed, 213 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index 0d2f1134fd..3effca61c7 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -619,3 +619,175 @@ def get_value(self, section_name: str, value_key: str): return None return self.json_data[section_name].get(value_key, None) + + + +class DatabaseConfig(ConfigStore): + """A ConfigStore for handling JSON data + """ + from augur.application.db.session import DatabaseSession + + def __init__(self, session: DatabaseSession, logger: logging.Logger): + super().__init__(logger) + self.session = session + + @property + def writable(self): + return True + + @property + def empty(self): + query = self.session.query(Config) + return execute_session_query(query, 'first') is None + + @staticmethod + def _dict_to_config_table(json_data:dict): + """Convert an augur settings dict into a mapping from table columns to values for insertion in bulk + + Args: + json_data (dict): The settings to convert, in the same format as the default_dict at the top of this file + """ + + config_values = [] + for section_name, settings in json_data.items(): + for key, value in settings.items(): + + if isinstance(value, dict) is True: + # TODO: Uncomment out when insights worker config stuff is resolved + # self.logger.error(f"Values cannot be of type dict: {value}") + return + + setting = { + "section_name": section_name, + "setting_name": key, + "value": value, + } + + if "type" not in setting: + setting["type"] = setting["value"].__class__.__name__ + + if setting["type"] == "NoneType": + setting["type"] = None + + config_values.append(setting) + + return config_values + + + def load_dict(cls, data: dict, ignore_existing=False): + if not self.writable: + raise NotWriteableException() + + for section, config_values in data.items(): + self.create_section(section, values, ignore_existing=ignore_existing) + + def retrieve_dict(self): + # get all the sections in the config table + query = self.session.query(Config.section_name).order_by(Config.section_name.asc()) + section_names = execute_session_query(query, 'all') + + config = {} + # loop through and get the data for each section + for section_name in section_names: + + section_data = self.get_section(section_name[0]) + + # rows with a section of None are on the top level, + # so we are adding these values to the top level rather + # than creating a section for them + if section_name[0] is None: + for key in list(section_data.keys()): + config[key] = section_data[key] + continue + + # add section data to config object + config[section_name[0]] = section_data + + return config + + def clear(self): + if not self.writable: + raise NotWriteableException() + + self.session.query(Config).delete() + self.session.commit() + + def remove_section(self, section_name: str) -> None: + if not self.writable: + raise NotWriteableException() + + self.session.query(Config).filter(Config.section_name == section_name).delete() + self.session.commit() + + + def has_section(self, section_name: str) -> bool: + query = self.session.query(Config).filter(Config.section_name == section_name) + return execute_session_query(query, 'first') is not None + + def create_section(self, section_name: str, values: Optional[dict] = None, ignore_existing=False) -> None: + if not self.writable: + raise NotWriteableException() + + if values is None: + values = {} + + for key, value in values.items(): + self.add_value(section_name, key, value, ignore_existing=ignore_existing) + + def get_section(self, section_name: str) -> dict: + query = self.session.query(Config).filter_by(section_name=section_name).order_by(Config.setting_name.asc()) + section_data = execute_session_query(query, 'all') + + section_dict = {} + for setting in section_data: + setting_dict = setting.__dict__ + + setting_dict = convert_type_of_value(setting_dict, self.logger) + + setting_name = setting_dict["setting_name"] + setting_value = setting_dict["value"] + + section_dict[setting_name] = setting_value + + return section_dict + + def remove_value(self, section_name: str, value_key: str) -> None: + raise NotImplementedError() + + def has_value(self, section_name: str, value_key: str) -> bool: + query = self.session.query(Config).filter(and_(Config.section_name == section_name,Config.setting_name == value_key) ) + return execute_session_query(query, 'first') is not None + + def add_value(self, section_name: str, value_key: str, value, ignore_existing=False) -> None: + + setting = self._dict_to_config_table({[section_name]: { [value_key]: value}}) + + if not self.has_value(section_name, value_key): + self.session.insert_data(setting,Config, ["section_name", "setting_name"]) + else: + if not ignore_existing: + self.logger.error(f"Could not insert config value '{value if section_name is not "Keys" else "REDACTED"}' into section '{section_name}' for key '{value_key}' database because a value already exists there and caller did not specify override") + return + #If setting exists. use raw update to not increase autoincrement + update_query = ( + update(Config) + .where(Config.section_name == setting["section_name"]) + .where(Config.setting_name == setting["setting_name"]) + .values(value=setting["value"]) + ) + + self.session.execute(update_query) + self.session.commit() + + def get_value(self, section_name: str, value_key: str): + try: + query = self.session.query(Config).filter(Config.section_name == section_name, Config.setting_name == value_key) + config_setting = execute_session_query(query, 'one') + except s.orm.exc.NoResultFound: + return None + + setting_dict = config_setting.__dict__ + + setting_dict = convert_type_of_value(setting_dict, self.logger) + + return setting_dict["value"] diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py index 7789a12b84..ee77874508 100644 --- a/tests/test_classes/test_config_stores.py +++ b/tests/test_classes/test_config_stores.py @@ -52,3 +52,44 @@ def test_jsonconfig_mutations_raise_not_writable(callable_name, args, kwargs): cfg = JsonConfig({"A": {"x": 1}}) with pytest.raises(NotWriteableException): getattr(cfg, callable_name)(*args, **kwargs) + + +def test_dict_to_config_table_happy_path(): + input_dict = { + "Section1": {"alpha": 1, "beta": "x"}, + "Section2": {"gamma": False, "delta": 3.14}, + } + + rows = DatabaseConfig._dict_to_config_table(input_dict) + + # Expect a list of row dicts with section_name, setting_name, value + assert isinstance(rows, list) + expected = [ + { + "section_name": "Section1", + "setting_name": "alpha", + "value": 1, + "type": "int" + }, + { + "section_name": "Section1", + "setting_name": "beta", + "value": "x", + "type": "str" + }, + { + "section_name": "Section2", + "setting_name": "gamma", + "value": False, + "type": "bool" + }, + { + "section_name": "Section2", + "setting_name": "delta", + "value": 3.14, + "type": "float" + }, + ] + assert rows == expected + + From 829cda4493e5e8fc82ed828f8a67aef46335dcb3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 15:44:28 -0500 Subject: [PATCH 080/105] set up config hierarchy Signed-off-by: Adrian Edwards --- augur/application/config.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 3effca61c7..12820e8c0c 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -130,12 +130,18 @@ def __init__(self, logger, session: DatabaseSession): self.logger = logger self.accepted_types = ["str", "bool", "int", "float", "NoneType"] + + # list items in order of precedence. lowest precedence (i.e. fallback) values first + self.config_sources = [ + JsonConfig(default_config, logger) + ] + config_dir = Path(os.getenv("CONFIG_DATADIR", "./")) config_path = config_dir.joinpath("augur.json") if config_path.exists(): - self.default_config = json.loads(config_path.read_text(encoding="UTF-8")) - else: - self.default_config = default_config + self.config_sources.append(JsonConfig(json.loads(config_path.read_text(encoding="UTF-8")), logger)) + + self.config_sources.append( DatabaseConfig(session, logger) ) def get_section(self, section_name) -> dict: """Get a section of data from the config. From 5d57910cb35885db26dcf7e5bf667e988b789ca6 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 16:13:14 -0500 Subject: [PATCH 081/105] determine emptiness based on all sources Signed-off-by: Adrian Edwards --- augur/application/config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 12820e8c0c..b2cf4c8340 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -233,8 +233,7 @@ def empty(self) -> bool: Returns: True if the config is empty, and False if it is not """ - query = self.session.query(Config) - return execute_session_query(query, 'first') is None + return all(map(lambda s: s.empty), self.config_sources) def is_section_in_config(self, section_name: str) -> bool: """Determine if a section is in the config. From 4f0482692003d6c962da950a1374faeb9cee2fba Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 16:13:32 -0500 Subject: [PATCH 082/105] get dict form of config based on all sources Signed-off-by: Adrian Edwards --- augur/application/config.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index b2cf4c8340..c387e819f6 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -201,29 +201,13 @@ def load_config(self) -> dict: """Get full config as a dictionary. Returns: - The config from the database + The config from all sources """ - # get all the sections in the config table - query = self.session.query(Config.section_name).order_by(Config.section_name.asc()) - section_names = execute_session_query(query, 'all') - config = {} - # loop through and get the data for each section - for section_name in section_names: - - section_data = self.get_section(section_name[0]) - - # rows with a section of None are on the top level, - # so we are adding these values to the top level rather - # than creating a section for them - if section_name[0] is None: - for key in list(section_data.keys()): - config[key] = section_data[key] - continue - - # add section data to config object - config[section_name[0]] = section_data + for config_source in self.config_sources: + config.update(config_source.retrieve_dict()) + return config From ea01040404d59a090ff13c90a09f4d89f2d6f23d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 16:15:42 -0500 Subject: [PATCH 083/105] determine if section is present in AugurConfig based on all sources Signed-off-by: Adrian Edwards --- augur/application/config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index c387e819f6..7a2d218c20 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -228,8 +228,7 @@ def is_section_in_config(self, section_name: str) -> bool: Returns: True if section is in the config, and False if it is not """ - query = self.session.query(Config).filter(Config.section_name == section_name) - return execute_session_query(query, 'first') is not None + return any(map(lambda s: s.has_section(section_name)), self.config_sources) def add_or_update_settings(self, settings: List[dict]): From c60fcab348ef0fed4523f6633a95c6d8d71af748 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 16:32:54 -0500 Subject: [PATCH 084/105] handle simpler cases where the writeable source needs to be updated Signed-off-by: Adrian Edwards --- augur/application/config.py | 79 +++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 7a2d218c20..48a6d1db09 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -143,6 +143,22 @@ def __init__(self, logger, session: DatabaseSession): self.config_sources.append( DatabaseConfig(session, logger) ) + def _get_writable_source(self) -> ConfigStore: + """Returns the highest precedence source that can be written to. + Intended to be used for operations that require changing the config updates. + + Raises: + NotWriteableException: If no sources are available for writing, this exception is raised to tell the caller they must proceed in a read only manner + + Returns: + ConfigStore: An instance of ConfigStore representing the config storage location that can be written to. + """ + writeable_sources = list(filter(lambda s: source.writable), self.config_sources) + if len(writeable_sources) < 1: + raise NotWriteableException + + return writeable_sources[-1] + def get_section(self, section_name) -> dict: """Get a section of data from the config. @@ -152,22 +168,11 @@ def get_section(self, section_name) -> dict: Returns: The section data as a dict """ - query = self.session.query(Config).filter_by(section_name=section_name).order_by(Config.setting_name.asc()) - section_data = execute_session_query(query, 'all') + if not self.is_section_in_config(section_name): + return {} - section_dict = {} - for setting in section_data: - setting_dict = setting.__dict__ - - setting_dict = convert_type_of_value(setting_dict, self.logger) - - setting_name = setting_dict["setting_name"] - setting_value = setting_dict["value"] - - section_dict[setting_name] = setting_value - - return section_dict - + config_dict = self.load_config() + return config_dict[section_name] def get_value(self, section_name: str, setting_name: str) -> Optional[Any]: """Get the value of a setting from the config. @@ -180,22 +185,15 @@ def get_value(self, section_name: str, setting_name: str) -> Optional[Any]: The value from config if found, and None otherwise """ - # TODO temporary until added to the DB schema - if section_name == "frontend" and setting_name == "pagination_offset": - return 25 - - try: - query = self.session.query(Config).filter(Config.section_name == section_name, Config.setting_name == setting_name) - config_setting = execute_session_query(query, 'one') - except s.orm.exc.NoResultFound: - return None - - setting_dict = config_setting.__dict__ - - setting_dict = convert_type_of_value(setting_dict, self.logger) - - return setting_dict["value"] + # TODO temporary until all uses of the lowercase version are gone + if section_name == "frontend": + section_name = "Frontend" + for source in self.config_sources.reverse(): + val = source.get_value(section_name, setting_name) + if val is not None: + return val + return None def load_config(self) -> dict: """Get full config as a dictionary. @@ -341,8 +339,14 @@ def load_config_from_dict(self, dict_data: dict) -> None: def clear(self) -> None: """Remove all values from the config.""" - self.session.query(Config).delete() - self.session.commit() + # note, with the hierarchical nature of the new config setup, this is a pretty useless method + # this is because the hierarhical store is designed to always be able to fall back on preconfigured defaults. + # Clearing will only reset any changes that the writable source provided to the config. + try: + writeable_config = self._get_writable_source() + writeable_config.clear() + except NotWriteableException: + return def remove_section(self, section_name: str) -> None: """Remove a section from the config. @@ -350,9 +354,14 @@ def remove_section(self, section_name: str) -> None: Args: section_name: The name of the section being deleted """ - self.session.query(Config).filter(Config.section_name == section_name).delete() - self.session.commit() - + # note, with the hierarchical nature of the new config setup, this is a pretty useless method + # this is because the hierarhical store is designed to always be able to fall back on preconfigured defaults. + # Removing a section will only reset any changes that the writable source contributed in that section. + try: + writeable_config = self._get_writable_source() + writeable_config.remove_section(section_name) + except NotWriteableException: + return def create_default_config(self) -> None: """Create default config in the database.""" From ac479c02395d7890e8fd94fd4b1e5441d5e11ca9 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 5 Nov 2025 16:39:00 -0500 Subject: [PATCH 085/105] update value redaction for API key values Signed-off-by: Adrian Edwards --- augur/application/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index 48a6d1db09..a7bce5fba1 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -763,7 +763,8 @@ def add_value(self, section_name: str, value_key: str, value, ignore_existing=Fa self.session.insert_data(setting,Config, ["section_name", "setting_name"]) else: if not ignore_existing: - self.logger.error(f"Could not insert config value '{value if section_name is not "Keys" else "REDACTED"}' into section '{section_name}' for key '{value_key}' database because a value already exists there and caller did not specify override") + value_redacted = value if section_name is not "Keys" else "REDACTED" + self.logger.error(f"Could not insert config value '{value_redacted}' into section '{section_name}' for key '{value_key}' database because a value already exists there and caller did not specify override") return #If setting exists. use raw update to not increase autoincrement update_query = ( From e2056e2c5aa6f8beea0403411db1aba431d63fc9 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 10:07:15 -0500 Subject: [PATCH 086/105] testing fixes Signed-off-by: Adrian Edwards --- augur/application/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index a7bce5fba1..a1994e03fa 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -143,7 +143,7 @@ def __init__(self, logger, session: DatabaseSession): self.config_sources.append( DatabaseConfig(session, logger) ) - def _get_writable_source(self) -> ConfigStore: + def _get_writable_source(self) -> 'ConfigStore': """Returns the highest precedence source that can be written to. Intended to be used for operations that require changing the config updates. @@ -153,7 +153,7 @@ def _get_writable_source(self) -> ConfigStore: Returns: ConfigStore: An instance of ConfigStore representing the config storage location that can be written to. """ - writeable_sources = list(filter(lambda s: source.writable), self.config_sources) + writeable_sources = list(filter(lambda s: source.writable, self.config_sources)) if len(writeable_sources) < 1: raise NotWriteableException From 775407724b32cc31cf2778e93584ddd69c641fb7 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 10:09:13 -0500 Subject: [PATCH 087/105] implement add_section_from_json using new config classes Signed-off-by: Adrian Edwards --- augur/application/config.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index a1994e03fa..10c02e04eb 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -281,26 +281,11 @@ def add_section_from_json(self, section_name: str, json_data: dict) -> None: section_name: The name of the section being added json_data: The data being added """ - data_keys = list(json_data.keys()) - - settings = [] - for key in data_keys: - - value = json_data[key] - - if isinstance(value, dict) is True: - # TODO: Uncomment out when insights worker config stuff is resolved - # self.logger.error(f"Values cannot be of type dict: {value}") - return - - setting = { - "section_name": section_name, - "setting_name": key, - "value": json_data[key], - } - settings.append(setting) - - self.add_or_update_settings(settings) + try: + writeable_config = self._get_writable_source() + writeable_config.add_section_from_json(section_name, json_data) + except NotWriteableException: + return def load_config_file(self, file_path: str) -> dict: From 10b38d536fd28e94a788bf78249f0790f73d3074 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 10:22:46 -0500 Subject: [PATCH 088/105] factor out a helper for filtering the config sources Signed-off-by: Adrian Edwards --- augur/application/config.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index 10c02e04eb..cb2b4b2d5f 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -153,11 +153,21 @@ def _get_writable_source(self) -> 'ConfigStore': Returns: ConfigStore: An instance of ConfigStore representing the config storage location that can be written to. """ - writeable_sources = list(filter(lambda s: source.writable, self.config_sources)) + writeable_sources = self._fetch_config_stores(lambda source: source.writable) if len(writeable_sources) < 1: raise NotWriteableException return writeable_sources[-1] + + def _fetch_config_stores(self, filter_func: None): + """Fetch the stack of config stores filtered by the provided function + + Args: + filter_func (func): a function or lambda accepting a ConfigSource as its only argument and returning a boolean indicating if it should be kept in or left out by the filter + """ + if filter_func is None: + return self.config_sources + return list(filter(filter_func, self.config_sources)) def get_section(self, section_name) -> dict: """Get a section of data from the config. From 23a25df79dd23c41f348781aef601261b455c448 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 10:25:17 -0500 Subject: [PATCH 089/105] add a base-config property to return the config as assembled by all read-only components of the config Signed-off-by: Adrian Edwards --- augur/application/config.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index cb2b4b2d5f..26eb38fc41 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -124,6 +124,20 @@ class AugurConfig(): session: DatabaseSession + @property + def base_config(self): + """Return the "base" config - either the default config or a default config with user modifications on top + This is used as a base upon which the Augur CLI injects values, such as API keys, connection strings, + and other values passed in via environment variables. + This config is then modified and passed into `load_config_from_dict`. + """ + read_only_sources = self._fetch_config_stores(lambda source: not source.writable) + config = {} + for config_source in read_only_sources: + config.update(config_source.retrieve_dict()) + + return config + def __init__(self, logger, session: DatabaseSession): self.session = session From 16ea9fa9b09fc9f0de76eb6b0f3575657d4e324b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 10:26:59 -0500 Subject: [PATCH 090/105] adjust config init process to use the new base_config Signed-off-by: Adrian Edwards --- augur/application/cli/config.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/augur/application/cli/config.py b/augur/application/cli/config.py index 7156c0561b..372e845db8 100644 --- a/augur/application/cli/config.py +++ b/augur/application/cli/config.py @@ -68,15 +68,15 @@ def init_config(ctx, github_api_key, facade_repo_directory, gitlab_api_key, redi config = AugurConfig(logger, session) - default_config = config.default_config + augmented_config = config.base_config phase_names = get_phase_names_without_import() #Add all phases as enabled by default for name in phase_names: - if name not in default_config['Task_Routine']: - default_config['Task_Routine'].update({name : 1}) + if name not in augmented_config['Task_Routine']: + augmented_config['Task_Routine'].update({name : 1}) #print(default_config) if redis_conn_string: @@ -91,18 +91,18 @@ def init_config(ctx, github_api_key, facade_repo_directory, gitlab_api_key, redi except ValueError: pass - default_config["Redis"]["connection_string"] = redis_conn_string + augmented_config["Redis"]["connection_string"] = redis_conn_string if rabbitmq_conn_string: - default_config["RabbitMQ"]["connection_string"] = rabbitmq_conn_string + augmented_config["RabbitMQ"]["connection_string"] = rabbitmq_conn_string - default_config["Keys"] = keys + augmented_config["Keys"] = keys - default_config["Facade"]["repo_directory"] = facade_repo_directory + augmented_config["Facade"]["repo_directory"] = facade_repo_directory - default_config["Logging"]["logs_directory"] = logs_directory or (ROOT_AUGUR_DIRECTORY + "/logs/") + augmented_config["Logging"]["logs_directory"] = logs_directory or (ROOT_AUGUR_DIRECTORY + "/logs/") - config.load_config_from_dict(default_config) + config.load_config_from_dict(augmented_config) @cli.command('load') From 3960551b2b8cbbaba9173ef3e6326720500a05ce Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 10:27:08 -0500 Subject: [PATCH 091/105] unused function Signed-off-by: Adrian Edwards --- augur/application/config.py | 5 --- .../test_config/test_config.py | 33 ------------------- 2 files changed, 38 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 26eb38fc41..5384825b89 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -372,11 +372,6 @@ def remove_section(self, section_name: str) -> None: except NotWriteableException: return - def create_default_config(self) -> None: - """Create default config in the database.""" - self.load_config_from_dict(self.default_config) - - class NotWriteableException(Exception): """Custom Augur exception class to be used when trying to modify a config that is not writeable """ diff --git a/tests/test_applicaton/test_config/test_config.py b/tests/test_applicaton/test_config/test_config.py index b6b69f8914..e7a533b5d4 100644 --- a/tests/test_applicaton/test_config/test_config.py +++ b/tests/test_applicaton/test_config/test_config.py @@ -404,39 +404,6 @@ def test_remove_section(test_db_config, test_db_engine): -def test_create_default_config(test_db_config, test_db_engine): - - from augur.application.config import default_config - - test_db_config.create_default_config() - - config_sections = list(default_config.keys()) - - try: - - with test_db_engine.connect() as connection: - - result = connection.execute("""SELECT * FROM augur_operations.config""").fetchall() - - assert result is not None - assert len(result) > 0 - - result_sections = [] - for row in result: - dict_data = dict(row) - - if dict_data["section_name"] not in result_sections: - result_sections.append(dict_data["section_name"]) - - assert dict_data["section_name"] and dict_data["setting_name"] - - assert len(config_sections) == len(result_sections) - - finally: - with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") - - From bb7ae02e5d0cf90ea65f384b571d49d32e95bf90 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 11:25:44 -0500 Subject: [PATCH 092/105] pass in a mock logger as part of the JsonConfig tests Generated-by: Gpt-5 via cursor Signed-off-by: Adrian Edwards --- tests/test_classes/test_config_stores.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py index ee77874508..5759985e87 100644 --- a/tests/test_classes/test_config_stores.py +++ b/tests/test_classes/test_config_stores.py @@ -1,23 +1,29 @@ # SPDX-License-Identifier: MIT import pytest +from unittest.mock import Mock from augur.application.config import JsonConfig, DatabaseConfig, NotWriteableException -def test_jsonconfig_readonly_flags(): - cfg = JsonConfig({"A": {"x": 1}}) +@pytest.fixture +def mock_logger(): + return Mock() + + +def test_jsonconfig_readonly_flags(mock_logger): + cfg = JsonConfig({"A": {"x": 1}}, mock_logger) assert cfg.writable is False assert cfg.empty is False -def test_jsonconfig_empty_true_false(): - assert JsonConfig({}).empty is True - assert JsonConfig({"A": {}}).empty is False +def test_jsonconfig_empty_true_false(mock_logger): + assert JsonConfig({}, mock_logger).empty is True + assert JsonConfig({"A": {}}, mock_logger).empty is False -def test_jsonconfig_retrieve_has_get(): +def test_jsonconfig_retrieve_has_get(mock_logger): data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} - cfg = JsonConfig(data) + cfg = JsonConfig(data, mock_logger) # retrieve full dict assert cfg.retrieve_dict() is data @@ -48,8 +54,8 @@ def test_jsonconfig_retrieve_has_get(): ("add_value", ("X", "y", 2), {"ignore_existing": False}), ], ) -def test_jsonconfig_mutations_raise_not_writable(callable_name, args, kwargs): - cfg = JsonConfig({"A": {"x": 1}}) +def test_jsonconfig_mutations_raise_not_writable(mock_logger, callable_name, args, kwargs): + cfg = JsonConfig({"A": {"x": 1}}, mock_logger) with pytest.raises(NotWriteableException): getattr(cfg, callable_name)(*args, **kwargs) From 220aa0f483a570a8b55045aaf27251e8ed72e9cd Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 13:37:05 -0500 Subject: [PATCH 093/105] don't use an in-place reverse - its going to cause all sorts of issues Signed-off-by: Adrian Edwards --- augur/application/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index 5384825b89..2c0761c499 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -213,7 +213,7 @@ def get_value(self, section_name: str, setting_name: str) -> Optional[Any]: if section_name == "frontend": section_name = "Frontend" - for source in self.config_sources.reverse(): + for source in reversed(self.config_sources): val = source.get_value(section_name, setting_name) if val is not None: return val From 3884fa8816fe47bc8b433fbb3cf93c8b9139d8ef Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 13:37:11 -0500 Subject: [PATCH 094/105] use a function that exists Signed-off-by: Adrian Edwards --- augur/application/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index 2c0761c499..5a10e52fd3 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -307,7 +307,7 @@ def add_section_from_json(self, section_name: str, json_data: dict) -> None: """ try: writeable_config = self._get_writable_source() - writeable_config.add_section_from_json(section_name, json_data) + writeable_config.create_section(section_name, json_data, ignore_existing=True) except NotWriteableException: return From 30e8e82628fc7d84467c9cb8a66cde40a35c139d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 13:37:34 -0500 Subject: [PATCH 095/105] add a test case for fetching real values Signed-off-by: Adrian Edwards --- tests/test_classes/test_config_stores.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py index 5759985e87..69fe19017f 100644 --- a/tests/test_classes/test_config_stores.py +++ b/tests/test_classes/test_config_stores.py @@ -2,13 +2,17 @@ import pytest from unittest.mock import Mock -from augur.application.config import JsonConfig, DatabaseConfig, NotWriteableException +from augur.application.config import JsonConfig, DatabaseConfig, NotWriteableException, AugurConfig, default_config @pytest.fixture def mock_logger(): return Mock() +@pytest.fixture +def mock_session(): + return Mock() + def test_jsonconfig_readonly_flags(mock_logger): cfg = JsonConfig({"A": {"x": 1}}, mock_logger) @@ -99,3 +103,9 @@ def test_dict_to_config_table_happy_path(): assert rows == expected + +def test_fetching_real_defaults(mock_logger, mock_session): + cfg = AugurConfig(mock_logger, mock_session) + cfg.config_sources = [JsonConfig(default_config, mock_logger)] + + assert cfg.get_value("Redis", "cache_group") == 0 From 389788bfb7c610fb4a46de753afb21d236f532b6 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 15:53:54 -0400 Subject: [PATCH 096/105] refactor start_celery_worker_processes to accept raw worker counts for each thing This is a tuple so it can potentially scale up without impacting the method signature Signed-off-by: Adrian Edwards --- augur/application/cli/backend.py | 41 +++++++++++++------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 28ed289033..8ab17e15b0 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -201,25 +201,26 @@ def start(ctx, disable_collection, development, pidfile, port): os.unlink(pidfile) -def start_celery_worker_processes(vmem_cap_ratio, disable_collection=False): +def start_celery_worker_processes(worker_counts: tuple[int, int, int], disable_collection=False): + """ + Args: + worker_counts (tuple): a tuple of three integers describing how many workers to use for core, secondary, and facade tasks + disable_collection (bool, optional): whether to disable collection entirely and not schedule any actual task workers. Defaults to False. + + Returns: + list: a list of the worker processes as executed by subprocess.Popen + """ #Calculate process scaling based on how much memory is available on the system in bytes. #Each celery process takes ~500MB or 500 * 1024^2 bytes process_list = [] - #Cap memory usage to 30% of total virtual memory - available_memory_in_bytes = psutil.virtual_memory().total * vmem_cap_ratio - available_memory_in_megabytes = available_memory_in_bytes / (1024 ** 2) - max_process_estimate = available_memory_in_megabytes // 500 - sleep_time = 0 + core_worker_count, secondary_worker_count, facade_worker_count = worker_counts - #Get a subset of the maximum procesess available using a ratio, not exceeding a maximum value - def determine_worker_processes(ratio,maximum): - return max(min(round(max_process_estimate * ratio),maximum),1) + sleep_time = 0 frontend_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n frontend:{uuid.uuid4().hex}@%h -Q frontend" - max_process_estimate -= 1 process_list.append(subprocess.Popen(frontend_worker.split(" "))) sleep_time += 6 @@ -227,28 +228,20 @@ def determine_worker_processes(ratio,maximum): #2 processes are always reserved as a baseline. scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=2 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling" - max_process_estimate -= 2 process_list.append(subprocess.Popen(scheduling_worker.split(" "))) sleep_time += 6 - - #60% of estimate, Maximum value of 45 : Reduced because it can be lower - core_num_processes = determine_worker_processes(.40, 90) - logger.info(f"Starting core worker processes with concurrency={core_num_processes}") - core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" + logger.info(f"Starting core worker processes with concurrency={core_worker_count}") + core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_worker_count} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 - #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.39, 50) - logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") - secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" + logger.info(f"Starting secondary worker processes with concurrency={secondary_worker_count}") + secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_worker_count} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 - #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.17, 20) - logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") - facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" + logger.info(f"Starting facade worker processes with concurrency={facade_worker_count}") + facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_worker_count} -n facade:{uuid.uuid4().hex}@%h -Q facade" process_list.append(subprocess.Popen(facade_worker.split(" "))) sleep_time += 6 From 1bd2cba3f79331b34577e299099841be4cc8ee87 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 16:05:59 -0400 Subject: [PATCH 097/105] apply same to start_celery_collection_processes Signed-off-by: Adrian Edwards --- augur/application/cli/collection.py | 37 +++++++++++------------------ 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/augur/application/cli/collection.py b/augur/application/cli/collection.py index f18ff03e29..0c9259d33a 100644 --- a/augur/application/cli/collection.py +++ b/augur/application/cli/collection.py @@ -127,47 +127,38 @@ def start(ctx, development): except RedisConnectionError: pass -def start_celery_collection_processes(vmem_cap_ratio): +def start_celery_collection_processes(worker_counts: tuple[int, int, int]): + """ + Args: + worker_counts (tuple): a tuple of three integers describing how many workers to use for core, secondary, and facade tasks - #Calculate process scaling based on how much memory is available on the system in bytes. - #Each celery process takes ~500MB or 500 * 1024^2 bytes + Returns: + list: a list of the collection processes as executed by subprocess.Popen + """ process_list = [] - #Cap memory usage to 30% of total virtual memory - available_memory_in_bytes = psutil.virtual_memory().total * vmem_cap_ratio - available_memory_in_megabytes = available_memory_in_bytes / (1024 ** 2) - max_process_estimate = available_memory_in_megabytes // 500 sleep_time = 0 - #Get a subset of the maximum processes available using a ratio, not exceeding a maximum value - def determine_worker_processes(ratio,maximum): - return max(min(round(max_process_estimate * ratio),maximum),1) + core_worker_count, secondary_worker_count, facade_worker_count = worker_counts #2 processes are always reserved as a baseline. scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=2 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling" - max_process_estimate -= 2 process_list.append(subprocess.Popen(scheduling_worker.split(" "))) sleep_time += 6 - #60% of estimate, Maximum value of 45: Reduced because not needed - core_num_processes = determine_worker_processes(.40, 90) - logger.info(f"Starting core worker processes with concurrency={core_num_processes}") - core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" + logger.info(f"Starting core collection processes with concurrency={core_worker_count}") + core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_worker_count} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 - #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.39, 50) - logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") - secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" + logger.info(f"Starting secondary collection processes with concurrency={secondary_worker_count}") + secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_worker_count} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 - #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.17, 20) - logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") - facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" + logger.info(f"Starting facade collection processes with concurrency={facade_worker_count}") + facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_worker_count} -n facade:{uuid.uuid4().hex}@%h -Q facade" process_list.append(subprocess.Popen(facade_worker.split(" "))) sleep_time += 6 From 267b903a0dba01670c32ce0cfe9b89df0a5d27eb Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 16:11:53 -0400 Subject: [PATCH 098/105] fetch new values from config Signed-off-by: Adrian Edwards --- augur/application/cli/backend.py | 7 +++++-- augur/application/cli/collection.py | 6 ++++-- augur/application/config.py | 4 +++- docs/source/getting-started/collecting-data.rst | 7 +++++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 8ab17e15b0..341df88862 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -87,7 +87,10 @@ def start(ctx, disable_collection, development, pidfile, port): if disable_collection: os.environ["AUGUR_DISABLE_COLLECTION"] = "1" - worker_vmem_cap = get_value("Celery", 'worker_process_vmem_cap') + core_worker_count = get_value("Celery", 'core_worker_count') + secondary_worker_count = get_value("Celery", 'secondary_worker_count') + facade_worker_count = get_value("Celery", 'facade_worker_count') + # create rabbit messages so if it failed on shutdown the queues are clean cleanup_collection_status_and_rabbit(logger, ctx.obj.engine) @@ -119,7 +122,7 @@ def start(ctx, disable_collection, development, pidfile, port): logger.info(f'Augur is running at: {"http" if development else "https"}://{host}:{port}') logger.info(f"The API is available at '{api_response.json()['route']}'") - processes = start_celery_worker_processes(float(worker_vmem_cap), disable_collection) + processes = start_celery_worker_processes((core_worker_count, secondary_worker_count, facade_worker_count), disable_collection) celery_beat_schedule_db = os.getenv("CELERYBEAT_SCHEDULE_DB", "celerybeat-schedule.db") if os.path.exists(celery_beat_schedule_db): diff --git a/augur/application/cli/collection.py b/augur/application/cli/collection.py index 0c9259d33a..810fecf74a 100644 --- a/augur/application/cli/collection.py +++ b/augur/application/cli/collection.py @@ -78,9 +78,11 @@ def start(ctx, development): os.environ["AUGUR_DEV"] = "1" logger.info("Starting in development mode") - worker_vmem_cap = get_value("Celery", 'worker_process_vmem_cap') + core_worker_count = get_value("Celery", 'core_worker_count') + secondary_worker_count = get_value("Celery", 'secondary_worker_count') + facade_worker_count = get_value("Celery", 'facade_worker_count') - process_list = start_celery_collection_processes(float(worker_vmem_cap)) + process_list = start_celery_collection_processes((core_worker_count, secondary_worker_count, facade_worker_count)) if os.path.exists("celerybeat-schedule.db"): logger.info("Deleting old task schedule") diff --git a/augur/application/config.py b/augur/application/config.py index 3fca15b25a..c92d2be8f1 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -68,7 +68,9 @@ def get_development_flag(): "log_level": "INFO", }, "Celery": { - "worker_process_vmem_cap": 0.25, + "core_worker_count": 5, + "secondary_worker_count": 5, + "facade_worker_count": 5, "refresh_materialized_views_interval_in_days": 1 }, "Redis": { diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index 91d5f1ad2b..cb21922047 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -88,8 +88,11 @@ Celery Configuration **We strongly recommend leaving the default celery blocks generated by the installation process, but if you would like to know more, or fine-tune them to your needs, read on.** -The celery monitor is responsible for generating the tasks that will tell the other worker processes what data to collect, and how. The ``Celery`` block has 2 keys; one for memory cap and one for materialized views interval. -- ``worker_process_vmem_cap``, float between zero and one that determines the maximum percentage of total memory to use for worker processes +The celery monitor is responsible for generating the tasks that will tell the other worker processes what data to collect, and how. The ``Celery`` block has several keys: + +- ``core_worker_count``, the number of workers to spawn to run the core tasks. +- ``secondary_worker_count``, the number of workers to spawn to run the secondary tasks. +- ``facade_worker_count``, the number of workers to spawn to run the facade tasks. - ``refresh_materialized_views_interval_in_days``, number of days to wait between refreshes of materialized views. From 05812c23b029fe0e4f328c323032673f34d5c5d1 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 14:20:50 -0500 Subject: [PATCH 099/105] fix unhashable type Signed-off-by: Adrian Edwards --- augur/application/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index 5a10e52fd3..90f11091ff 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -761,7 +761,7 @@ def has_value(self, section_name: str, value_key: str) -> bool: def add_value(self, section_name: str, value_key: str, value, ignore_existing=False) -> None: - setting = self._dict_to_config_table({[section_name]: { [value_key]: value}}) + setting = self._dict_to_config_table({section_name: { value_key: value}}) if not self.has_value(section_name, value_key): self.session.insert_data(setting,Config, ["section_name", "setting_name"]) From 53a2ae34fda67fdf33a69302c7ae31139dfd46b2 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 14:55:48 -0500 Subject: [PATCH 100/105] refactor redaction into a helper Signed-off-by: Adrian Edwards --- augur/application/config.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 90f11091ff..4ecc380628 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -28,7 +28,9 @@ def get_development_flag_from_config(): def get_development_flag(): return os.getenv("AUGUR_DEV") or get_development_flag_from_config() or False - +def redact_setting_value(section_name, setting_name, value): + value_redacted = value if section_name != "Keys" else "REDACTED" + return value_redacted default_config = { "Augur": { @@ -767,8 +769,7 @@ def add_value(self, section_name: str, value_key: str, value, ignore_existing=Fa self.session.insert_data(setting,Config, ["section_name", "setting_name"]) else: if not ignore_existing: - value_redacted = value if section_name is not "Keys" else "REDACTED" - self.logger.error(f"Could not insert config value '{value_redacted}' into section '{section_name}' for key '{value_key}' database because a value already exists there and caller did not specify override") + self.logger.error(f"Could not insert config value '{redact_setting_value(section_name, setting_name, value)}' into section '{section_name}' for key '{value_key}' database because a value already exists there and caller did not specify override") return #If setting exists. use raw update to not increase autoincrement update_query = ( From 2d4fa30634748f372d90ccf9cab1c4ef1c81d871 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 14:56:22 -0500 Subject: [PATCH 101/105] add_or_update_settings is used externally, replace it with a call to add_value Signed-off-by: Adrian Edwards --- augur/application/config.py | 56 ++++++++----------------------------- 1 file changed, 12 insertions(+), 44 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 4ecc380628..6bacd85506 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -253,52 +253,20 @@ def is_section_in_config(self, section_name: str) -> bool: True if section is in the config, and False if it is not """ return any(map(lambda s: s.has_section(section_name)), self.config_sources) - - - def add_or_update_settings(self, settings: List[dict]): - """Add or update a list of settings. - + + def add_value(self, section_name, setting_name, value): + """Adds or updates a config value. + Args: - list of settings with dicts containing section_name, setting_name, value, and optionally type - - Examples: - type is optional - setting = { - "section_name": section_name, - "setting_name": setting_name, - "value": value, - "type": data_type # optional - } + section_name: The name of the section being added + json_data: The data being added """ - for setting in settings: - - if "type" not in setting: - setting["type"] = setting["value"].__class__.__name__ - - if setting["type"] == "NoneType": - setting["type"] = None - - #print(f"\nsetting: {settings}") - #self.session.insert_data(settings,Config, ["section_name", "setting_name"]) - - #Check if setting exists. - query = self.session.query(Config).filter(and_(Config.section_name == setting["section_name"],Config.setting_name == setting["setting_name"]) ) - - if execute_session_query(query, 'first') is None: - # TODO: Update to use bulk insert dicts so config doesn't require database session - self.session.insert_data(setting,Config, ["section_name", "setting_name"]) - else: - #If setting exists. use raw update to not increase autoincrement - update_query = ( - update(Config) - .where(Config.section_name == setting["section_name"]) - .where(Config.setting_name == setting["setting_name"]) - .values(value=setting["value"]) - ) - - self.session.execute(update_query) - self.session.commit() - + try: + writeable_config = self._get_writable_source() + writeable_config.add_value(section_name, setting_name, value, ignore_existing=True) + except NotWriteableException: + return + def add_section_from_json(self, section_name: str, json_data: dict) -> None: """Add a section from a dict. From 9c1f422ca6c6ff97170134e602394ac10fd062ff Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 15:00:24 -0500 Subject: [PATCH 102/105] replace external calls to add_or_update_settings Signed-off-by: Adrian Edwards --- augur/application/cli/config.py | 22 ++----------------- .../test_config/test_config.py | 15 +++++-------- 2 files changed, 7 insertions(+), 30 deletions(-) diff --git a/augur/application/cli/config.py b/augur/application/cli/config.py index 372e845db8..34835bed57 100644 --- a/augur/application/cli/config.py +++ b/augur/application/cli/config.py @@ -162,34 +162,16 @@ def add_section(ctx, section_name, file): @click.option('--section', required=True) @click.option('--setting', required=True) @click.option('--value', required=True) -@click.option('--data-type') @test_connection @test_db_connection @with_database @click.pass_context -def config_set(ctx, section, setting, value, data_type): +def config_set(ctx, section, setting, value): with DatabaseSession(logger, engine=ctx.obj.engine) as session: config = AugurConfig(logger, session) - - if not data_type: - result = session.query(Config).filter(Config.section_name == section, Config.setting_name == setting).all() - if not result: - return click.echo("You must specify a data-type if the setting does not already exist") - data_type = result[0].type - - if data_type not in config.accepted_types: - print(f"Error invalid type for config. Please use one of these types: {config.accepted_types}") - return - - setting_dict = { - "section_name": section, - "setting_name": setting, - "value": value, - "type": data_type - } - config.add_or_update_settings([setting_dict]) + config.add_value(section, setting, value) print(f"{setting} in {section} section set to {value}") @cli.command('get') diff --git a/tests/test_applicaton/test_config/test_config.py b/tests/test_applicaton/test_config/test_config.py index e7a533b5d4..2194b122fc 100644 --- a/tests/test_applicaton/test_config/test_config.py +++ b/tests/test_applicaton/test_config/test_config.py @@ -168,11 +168,9 @@ def test_config_is_section_in_config(test_db_config, test_db_engine): def test_config_add_settings(test_db_config, test_db_engine): try: - ip_standard = {"section_name": "Network", "setting_name": "ip_standard", "value": "ipv4"} - subnet_mask = {"section_name": "Network", "setting_name": "subnet_mask", "value": "/24"} - settings = [ip_standard, subnet_mask] - test_db_config.add_or_update_settings(settings) + test_db_config.add_value("Network", "ip_standard", "ipv4") + test_db_config.add_value("Network", "subnet_mask", "/24") with test_db_engine.connect() as connection: @@ -208,12 +206,7 @@ def test_config_update_settings(test_db_config, test_db_engine): new_ip = "1.1.1.1" new_subnet_mask = "/16" - ip_standard_updated["value"] = new_ip_standard - ip_updated["value"] = new_ip - subnet_mask_updated["value"] = new_subnet_mask - all_data = [ip_standard, ip, subnet_mask] - updated_settings = [ip_standard_updated, ip_updated, subnet_mask_updated] with test_db_engine.connect() as connection: @@ -223,7 +216,9 @@ def test_config_update_settings(test_db_config, test_db_engine): connection.execute(query, **data) - test_db_config.add_or_update_settings(updated_settings) + test_db_config.add_value("Network", "ip_standard", new_ip_standard) + test_db_config.add_value("Network", "ip", new_ip) + test_db_config.add_value("Network", "subnet_mask", new_subnet_mask) with test_db_engine.connect() as connection: From aa6ce9d0772fb568810c9fe4f49f6c11b38ca70e Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 15:01:18 -0500 Subject: [PATCH 103/105] make use of redaction function Signed-off-by: Adrian Edwards --- augur/application/cli/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/cli/config.py b/augur/application/cli/config.py index 34835bed57..6f22ea6c83 100644 --- a/augur/application/cli/config.py +++ b/augur/application/cli/config.py @@ -9,7 +9,7 @@ from augur.application.db.models import Config from augur.application.db.session import DatabaseSession -from augur.application.config import AugurConfig +from augur.application.config import AugurConfig, redact_setting_value from augur.application.cli import DatabaseContext, test_connection, test_db_connection, with_database from augur.util.inspect_without_import import get_phase_names_without_import ROOT_AUGUR_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) @@ -172,7 +172,7 @@ def config_set(ctx, section, setting, value): config = AugurConfig(logger, session) config.add_value(section, setting, value) - print(f"{setting} in {section} section set to {value}") + print(f"{setting} in {section} section set to {redact_setting_value(section, setting, value)}") @cli.command('get') @click.option('--section', required=True) From de98f2520df6d79abe59067f215a0fd0b8019a03 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 15:01:48 -0500 Subject: [PATCH 104/105] fix: extract intended dict value from the returned list Signed-off-by: Adrian Edwards --- augur/application/config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index 6bacd85506..62531eec50 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -731,7 +731,10 @@ def has_value(self, section_name: str, value_key: str) -> bool: def add_value(self, section_name: str, value_key: str, value, ignore_existing=False) -> None: - setting = self._dict_to_config_table({section_name: { value_key: value}}) + converted_settings = self._dict_to_config_table({section_name: { value_key: value}}) + + if len(converted_settings) >= 1: + setting = converted_settings[0] if not self.has_value(section_name, value_key): self.session.insert_data(setting,Config, ["section_name", "setting_name"]) From 9d19d484392a7b1d9010554a570deb0e14eb4d7d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 6 Nov 2025 15:41:39 -0500 Subject: [PATCH 105/105] more fixes Signed-off-by: Adrian Edwards --- augur/application/config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 62531eec50..d40ebb6845 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -252,7 +252,7 @@ def is_section_in_config(self, section_name: str) -> bool: Returns: True if section is in the config, and False if it is not """ - return any(map(lambda s: s.has_section(section_name)), self.config_sources) + return any(map(lambda s: s.has_section(section_name), self.config_sources)) def add_value(self, section_name, setting_name, value): """Adds or updates a config value. @@ -645,12 +645,12 @@ def _dict_to_config_table(json_data:dict): return config_values - def load_dict(cls, data: dict, ignore_existing=False): + def load_dict(self, data: dict, ignore_existing=False): if not self.writable: raise NotWriteableException() for section, config_values in data.items(): - self.create_section(section, values, ignore_existing=ignore_existing) + self.create_section(section, config_values, ignore_existing=ignore_existing) def retrieve_dict(self): # get all the sections in the config table @@ -740,7 +740,7 @@ def add_value(self, section_name: str, value_key: str, value, ignore_existing=Fa self.session.insert_data(setting,Config, ["section_name", "setting_name"]) else: if not ignore_existing: - self.logger.error(f"Could not insert config value '{redact_setting_value(section_name, setting_name, value)}' into section '{section_name}' for key '{value_key}' database because a value already exists there and caller did not specify override") + self.logger.error(f"Could not insert config value '{redact_setting_value(section_name, value_key, value)}' into section '{section_name}' for key '{value_key}' database because a value already exists there and caller did not specify override") return #If setting exists. use raw update to not increase autoincrement update_query = (