diff --git a/augur/application/config.py b/augur/application/config.py index e3e93302eb..7ace2befd3 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -4,7 +4,7 @@ from typing import List, Any, Optional import os from augur.application.db.models import Config -from augur.application.db.util import execute_session_query +from augur.application.db.util import execute_session_query, convert_type_of_value def get_development_flag_from_config(): @@ -109,35 +109,6 @@ def get_development_flag(): } -def convert_type_of_value(config_dict, logger=None): - - data_type = config_dict["type"] - - if data_type == "str" or data_type is None: - return config_dict - - elif data_type == "int": - config_dict["value"] = int(config_dict["value"]) - - elif data_type == "bool": - value = config_dict["value"] - - if value.lower() == "false": - config_dict["value"] = False - else: - config_dict["value"] = True - - elif data_type == "float": - config_dict["value"] = float(config_dict["value"]) - - else: - if logger: - logger.error(f"Need to add support for {data_type} types to config") - else: - print(f"Need to add support for {data_type} types to config") - - return config_dict - class AugurConfig(): from augur.application.db.session import DatabaseSession diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index b4004d7734..a82c97dd66 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -13,41 +13,11 @@ from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus, UserGroup, RepoGroup from augur.tasks.util.collection_state import CollectionState from augur.application.db import get_session, get_engine -from augur.application.db.util import execute_session_query +from augur.application.db.util import execute_session_query, convert_type_of_value from augur.application.db.session import remove_duplicates_by_uniques, remove_null_characters_from_list_of_dicts logger = logging.getLogger("db_lib") -def convert_type_of_value(config_dict, logger=None): - - - data_type = config_dict["type"] - - if data_type == "str" or data_type is None: - return config_dict - - if data_type == "int": - config_dict["value"] = int(config_dict["value"]) - - elif data_type == "bool": - value = config_dict["value"] - - if value.lower() == "false": - config_dict["value"] = False - else: - config_dict["value"] = True - - elif data_type == "float": - config_dict["value"] = float(config_dict["value"]) - - else: - if logger: - logger.error(f"Need to add support for {data_type} types to config") - else: - print(f"Need to add support for {data_type} types to config") - - return config_dict - def get_section(section_name) -> dict: """Get a section of data from the config. @@ -255,24 +225,46 @@ def facade_bulk_insert_commits(logger, records): facade_bulk_insert_commits(logger, firsthalfRecords) facade_bulk_insert_commits(logger, secondhalfRecords) - elif len(records) == 1 and isinstance(e,DataError) and "time zone displacement" in f"{e}": + elif len(records) == 1: commit_record = records[0] #replace incomprehensible dates with epoch. #2021-10-11 11:57:46 -0500 # placeholder_date = "1970-01-01 00:00:15 -0500" - placeholder_date = commit_record['author_timestamp'] + placeholder_date = commit_record['cmt_author_timestamp'] + + postgres_valid_timezones = { + -1200, -1100, -1000, -930, -900, -800, -700, + -600, -500, -400, -300, -230, -200, -100, 000, + 100, 200, 300, 330, 400, 430, 500, 530, 545, 600, + 630, 700, 800, 845, 900, 930, 1000, 1030, 1100, 1200, + 1245, 1300, 1400 + } # Reconstruct timezone portion of the date string to UTC - placeholder_date = re.split("[-+]", placeholder_date) - placeholder_date.pop() - placeholder_date = "-".join(placeholder_date) + "+0000" + placeholder_date_segments = re.split(" ", placeholder_date) + tzdata = placeholder_date_segments.pop() + + if ":" in tzdata: + tzdata = tzdata.replace(":", "") + + if int(tzdata) not in postgres_valid_timezones: + tzdata = "+0000" + else: + raise e + + placeholder_date_segments.append(tzdata) + + placeholder_date = " ".join(placeholder_date_segments) #Check for improper utc timezone offset #UTC timezone offset should be between -14:00 and +14:00 - commit_record['author_timestamp'] = placeholder_date - commit_record['committer_timestamp'] = placeholder_date + # analyzecommit.generate_commit_record() defines the keys on the commit_record dictionary + commit_record['cmt_author_timestamp'] = placeholder_date + commit_record['cmt_committer_timestamp'] = placeholder_date + + logger.warning(f"commit with invalid timezone set to UTC: {commit_record['cmt_commit_hash']}") session.execute( s.insert(Commit), diff --git a/augur/application/db/util.py b/augur/application/db/util.py index 9fa49ab00d..81f24ea6dd 100644 --- a/augur/application/db/util.py +++ b/augur/application/db/util.py @@ -58,3 +58,33 @@ def convert_orm_list_to_dict_list(result): return new_list + + +def convert_type_of_value(config_dict, logger=None): + + data_type = config_dict["type"] + + if data_type == "str" or data_type is None: + return config_dict + + elif data_type == "int": + config_dict["value"] = int(config_dict["value"]) + + elif data_type == "bool": + value = config_dict["value"] + + if value.lower() == "false": + config_dict["value"] = False + else: + config_dict["value"] = True + + elif data_type == "float": + config_dict["value"] = float(config_dict["value"]) + + else: + if logger: + logger.error(f"Need to add support for {data_type} types to config") + else: + print(f"Need to add support for {data_type} types to config") + + return config_dict \ No newline at end of file diff --git a/augur/application/util.py b/augur/application/util.py index 03e591df98..af11d7d367 100644 --- a/augur/application/util.py +++ b/augur/application/util.py @@ -24,4 +24,4 @@ def get_all_repos_count(**kwargs): result = controller.get_repo_count(source="all", **kwargs) - return result + return result \ No newline at end of file