From b8c2ca4e271e3ddc413e33553c81d147154849d9 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 14 Jul 2025 15:40:59 -0400 Subject: [PATCH 1/4] deduplicate convert_type_of_value Signed-off-by: Adrian Edwards --- augur/application/config.py | 30 +----------------------------- augur/application/db/lib.py | 31 +------------------------------ augur/application/util.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 59 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index e3e93302eb..014c8ae6a0 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -5,6 +5,7 @@ import os from augur.application.db.models import Config from augur.application.db.util import execute_session_query +from augur.application.util import convert_type_of_value def get_development_flag_from_config(): @@ -109,35 +110,6 @@ def get_development_flag(): } -def convert_type_of_value(config_dict, logger=None): - - data_type = config_dict["type"] - - if data_type == "str" or data_type is None: - return config_dict - - elif data_type == "int": - config_dict["value"] = int(config_dict["value"]) - - elif data_type == "bool": - value = config_dict["value"] - - if value.lower() == "false": - config_dict["value"] = False - else: - config_dict["value"] = True - - elif data_type == "float": - config_dict["value"] = float(config_dict["value"]) - - else: - if logger: - logger.error(f"Need to add support for {data_type} types to config") - else: - print(f"Need to add support for {data_type} types to config") - - return config_dict - class AugurConfig(): from augur.application.db.session import DatabaseSession diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index b4004d7734..cb6bc283e5 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -14,40 +14,11 @@ from augur.tasks.util.collection_state import CollectionState from augur.application.db import get_session, get_engine from augur.application.db.util import execute_session_query +from augur.application.util import convert_type_of_value from augur.application.db.session import remove_duplicates_by_uniques, remove_null_characters_from_list_of_dicts logger = logging.getLogger("db_lib") -def convert_type_of_value(config_dict, logger=None): - - - data_type = config_dict["type"] - - if data_type == "str" or data_type is None: - return config_dict - - if data_type == "int": - config_dict["value"] = int(config_dict["value"]) - - elif data_type == "bool": - value = config_dict["value"] - - if value.lower() == "false": - config_dict["value"] = False - else: - config_dict["value"] = True - - elif data_type == "float": - config_dict["value"] = float(config_dict["value"]) - - else: - if logger: - logger.error(f"Need to add support for {data_type} types to config") - else: - print(f"Need to add support for {data_type} types to config") - - return config_dict - def get_section(section_name) -> dict: """Get a section of data from the config. diff --git a/augur/application/util.py b/augur/application/util.py index 03e591df98..fa5a63d13d 100644 --- a/augur/application/util.py +++ b/augur/application/util.py @@ -25,3 +25,33 @@ def get_all_repos_count(**kwargs): result = controller.get_repo_count(source="all", **kwargs) return result + + +def convert_type_of_value(config_dict, logger=None): + + data_type = config_dict["type"] + + if data_type == "str" or data_type is None: + return config_dict + + elif data_type == "int": + config_dict["value"] = int(config_dict["value"]) + + elif data_type == "bool": + value = config_dict["value"] + + if value.lower() == "false": + config_dict["value"] = False + else: + config_dict["value"] = True + + elif data_type == "float": + config_dict["value"] = float(config_dict["value"]) + + else: + if logger: + logger.error(f"Need to add support for {data_type} types to config") + else: + print(f"Need to add support for {data_type} types to config") + + return config_dict \ No newline at end of file From b289d662ab08588a022b141b091d79b2cb9d1de3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 23 Jul 2025 16:22:05 -0400 Subject: [PATCH 2/4] move to db.util Signed-off-by: Adrian Edwards --- augur/application/config.py | 3 +-- augur/application/db/lib.py | 3 +-- augur/application/db/util.py | 30 ++++++++++++++++++++++++++++++ augur/application/util.py | 32 +------------------------------- 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 014c8ae6a0..7ace2befd3 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -4,8 +4,7 @@ from typing import List, Any, Optional import os from augur.application.db.models import Config -from augur.application.db.util import execute_session_query -from augur.application.util import convert_type_of_value +from augur.application.db.util import execute_session_query, convert_type_of_value def get_development_flag_from_config(): diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index cb6bc283e5..5bec1dc8ad 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -13,8 +13,7 @@ from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus, UserGroup, RepoGroup from augur.tasks.util.collection_state import CollectionState from augur.application.db import get_session, get_engine -from augur.application.db.util import execute_session_query -from augur.application.util import convert_type_of_value +from augur.application.db.util import execute_session_query, convert_type_of_value from augur.application.db.session import remove_duplicates_by_uniques, remove_null_characters_from_list_of_dicts logger = logging.getLogger("db_lib") diff --git a/augur/application/db/util.py b/augur/application/db/util.py index 9fa49ab00d..81f24ea6dd 100644 --- a/augur/application/db/util.py +++ b/augur/application/db/util.py @@ -58,3 +58,33 @@ def convert_orm_list_to_dict_list(result): return new_list + + +def convert_type_of_value(config_dict, logger=None): + + data_type = config_dict["type"] + + if data_type == "str" or data_type is None: + return config_dict + + elif data_type == "int": + config_dict["value"] = int(config_dict["value"]) + + elif data_type == "bool": + value = config_dict["value"] + + if value.lower() == "false": + config_dict["value"] = False + else: + config_dict["value"] = True + + elif data_type == "float": + config_dict["value"] = float(config_dict["value"]) + + else: + if logger: + logger.error(f"Need to add support for {data_type} types to config") + else: + print(f"Need to add support for {data_type} types to config") + + return config_dict \ No newline at end of file diff --git a/augur/application/util.py b/augur/application/util.py index fa5a63d13d..af11d7d367 100644 --- a/augur/application/util.py +++ b/augur/application/util.py @@ -24,34 +24,4 @@ def get_all_repos_count(**kwargs): result = controller.get_repo_count(source="all", **kwargs) - return result - - -def convert_type_of_value(config_dict, logger=None): - - data_type = config_dict["type"] - - if data_type == "str" or data_type is None: - return config_dict - - elif data_type == "int": - config_dict["value"] = int(config_dict["value"]) - - elif data_type == "bool": - value = config_dict["value"] - - if value.lower() == "false": - config_dict["value"] = False - else: - config_dict["value"] = True - - elif data_type == "float": - config_dict["value"] = float(config_dict["value"]) - - else: - if logger: - logger.error(f"Need to add support for {data_type} types to config") - else: - print(f"Need to add support for {data_type} types to config") - - return config_dict \ No newline at end of file + return result \ No newline at end of file From b17d8f977dd80d994b4ffff1bb938584ef84226f Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Tue, 22 Jul 2025 18:30:47 -0500 Subject: [PATCH 3/4] Try a new version of the TZdata fix Signed-off-by: Ulincsys --- augur/application/db/lib.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 5bec1dc8ad..5fe0443967 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -225,18 +225,37 @@ def facade_bulk_insert_commits(logger, records): facade_bulk_insert_commits(logger, firsthalfRecords) facade_bulk_insert_commits(logger, secondhalfRecords) - elif len(records) == 1 and isinstance(e,DataError) and "time zone displacement" in f"{e}": + elif len(records) == 1: commit_record = records[0] #replace incomprehensible dates with epoch. #2021-10-11 11:57:46 -0500 # placeholder_date = "1970-01-01 00:00:15 -0500" placeholder_date = commit_record['author_timestamp'] + + postgres_valid_timezones = { + -1200, -1100, -1000, -930, -900, -800, -700, + -600, -500, -400, -300, -230, -200, -100, 000, + 100, 200, 300, 330, 400, 430, 500, 530, 545, 600, + 630, 700, 800, 845, 900, 930, 1000, 1030, 1100, 1200, + 1245, 1300, 1400 + } # Reconstruct timezone portion of the date string to UTC - placeholder_date = re.split("[-+]", placeholder_date) - placeholder_date.pop() - placeholder_date = "-".join(placeholder_date) + "+0000" + placeholder_date_segments = re.split(" ", placeholder_date) + tzdata = placeholder_date_segments.pop() + + if ":" in tzdata: + tzdata = tzdata.replace(":", "") + + if int(tzdata) not in postgres_valid_timezones: + tzdata = "+0000" + else: + raise e + + placeholder_date_segments.append(tzdata) + + placeholder_date = " ".join(placeholder_date_segments) #Check for improper utc timezone offset #UTC timezone offset should be between -14:00 and +14:00 From aeaf09f5d6debfcc9f4cbc5bae2cf04fa9ab55b5 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Sat, 16 Aug 2025 11:25:31 -0500 Subject: [PATCH 4/4] Fix KeyError in invalid timezone handling The keys on this dictionary are defined in: analyzecommit.generate_commit_record() - Update reference to use proper 'cmt_author_timestamp' key - Add warning log when replacing TZdata to show commit hash Signed-off-by: Ulincsys --- augur/application/db/lib.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index 5fe0443967..a82c97dd66 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -231,7 +231,7 @@ def facade_bulk_insert_commits(logger, records): #2021-10-11 11:57:46 -0500 # placeholder_date = "1970-01-01 00:00:15 -0500" - placeholder_date = commit_record['author_timestamp'] + placeholder_date = commit_record['cmt_author_timestamp'] postgres_valid_timezones = { -1200, -1100, -1000, -930, -900, -800, -700, @@ -260,8 +260,11 @@ def facade_bulk_insert_commits(logger, records): #Check for improper utc timezone offset #UTC timezone offset should be between -14:00 and +14:00 - commit_record['author_timestamp'] = placeholder_date - commit_record['committer_timestamp'] = placeholder_date + # analyzecommit.generate_commit_record() defines the keys on the commit_record dictionary + commit_record['cmt_author_timestamp'] = placeholder_date + commit_record['cmt_committer_timestamp'] = placeholder_date + + logger.warning(f"commit with invalid timezone set to UTC: {commit_record['cmt_commit_hash']}") session.execute( s.insert(Commit),