From 99fc30cf5e9e08cfb8464696e3a400a331e61d9d Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Fri, 24 Oct 2025 10:45:06 -0400
Subject: [PATCH 001/104] remove unused functions

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/util/collection_util.py | 95 -----------------------------
 1 file changed, 95 deletions(-)

diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py
index 28489d63c8..66958e2fe9 100644
--- a/augur/tasks/util/collection_util.py
+++ b/augur/tasks/util/collection_util.py
@@ -18,101 +18,6 @@
 from augur.tasks.util.collection_state import CollectionState
 
 
-def get_list_of_all_users():
-    #Get a list of all users.
-    query = s.sql.text("""
-        SELECT  
-        user_id
-        FROM augur_operations.users
-    """)
-
-    users = execute_sql(query).fetchall()
-    return users
-
-
-def get_required_conditions_for_core_repos(allow_collected_before = False, days_until_collect_again = 1):
-
-    if not allow_collected_before:
-        condition_concat_string = f"""
-            core_status='{str(CollectionState.PENDING.value)}' AND core_status!='{str(CollectionState.ERROR.value)}'
-            AND augur_operations.collection_status.core_data_last_collected IS NULL
-            AND core_status!='{str(CollectionState.COLLECTING.value)}'
-        """
-    else:
-        condition_concat_string = f"""
-            core_status='Success' AND core_status!='{str(CollectionState.ERROR.value)}'
-            AND augur_operations.collection_status.core_data_last_collected IS NOT NULL
-            AND core_status!='{str(CollectionState.COLLECTING.value)}'
-            AND core_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS'
-        """
-    
-    return condition_concat_string
-
-def get_required_conditions_for_secondary_repos(allow_collected_before = False, days_until_collect_again = 1):
-
-    if not allow_collected_before:
-        condition_concat_string = f"""
-            secondary_status='{str(CollectionState.PENDING.value)}' AND secondary_status!='{str(CollectionState.ERROR.value)}'
-            AND augur_operations.collection_status.core_status = '{str(CollectionState.SUCCESS.value)}' 
-            AND augur_operations.collection_status.secondary_data_last_collected IS NULL
-            AND secondary_status!='{str(CollectionState.COLLECTING.value)}'
-        """
-    else:
-        condition_concat_string = f"""
-            secondary_status='Success' AND secondary_status!='{str(CollectionState.ERROR.value)}'
-            AND augur_operations.collection_status.secondary_data_last_collected IS NOT NULL
-            AND augur_operations.collection_status.core_status = '{str(CollectionState.SUCCESS.value)}'
-            AND secondary_status!='{str(CollectionState.COLLECTING.value)}'
-            AND secondary_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS'
-        """
-    
-    return condition_concat_string
-
-def get_required_conditions_for_facade_repos(allow_collected_before = False, days_until_collect_again = 1):
-
-    if not allow_collected_before:
-        condition_concat_string = f"""
-            facade_status='{str(CollectionState.UPDATE.value)}' AND facade_status!='{str(CollectionState.ERROR.value)}'
-            AND augur_operations.collection_status.facade_status != '{str(CollectionState.PENDING.value)}'
-            AND augur_operations.collection_status.facade_status != '{str(CollectionState.FAILED_CLONE.value)}'
-            AND augur_operations.collection_status.facade_status != '{str(CollectionState.INITIALIZING.value)}'
-            AND augur_operations.collection_status.facade_data_last_collected IS NULL
-            AND facade_status!='{str(CollectionState.COLLECTING.value)}'
-        """
-    else:
-        condition_concat_string = f"""
-            facade_status='Success' AND facade_status!='{str(CollectionState.ERROR.value)}'
-            AND augur_operations.collection_status.facade_data_last_collected IS NOT NULL
-            AND augur_operations.collection_status.facade_status != '{str(CollectionState.PENDING.value)}'
-            AND augur_operations.collection_status.facade_status != '{str(CollectionState.FAILED_CLONE.value)}'
-            AND augur_operations.collection_status.facade_status != '{str(CollectionState.INITIALIZING.value)}'
-            AND facade_status!='{str(CollectionState.COLLECTING.value)}'
-            AND facade_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS'
-        """
-    
-    return condition_concat_string
-
-def get_required_conditions_for_ml_repos(allow_collected_before = False, days_until_collect_again = 1):
-
-    if not allow_collected_before:
-        condition_concat_string = f"""
-            ml_status='{str(CollectionState.PENDING.value)}' AND ml_status!='{str(CollectionState.ERROR.value)}'
-            AND augur_operations.collection_status.secondary_status = '{str(CollectionState.SUCCESS.value)}'
-            AND augur_operations.collection_status.ml_data_last_collected IS NULL
-            AND ml_status!='{str(CollectionState.COLLECTING.value)}'
-        """
-    else:
-        condition_concat_string = f"""
-            ml_status='Success' AND ml_status!='{str(CollectionState.ERROR.value)}'
-            AND augur_operations.collection_status.ml_data_last_collected IS NOT NULL
-            AND ml_status!='{str(CollectionState.COLLECTING.value)}'
-            AND ml_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS'
-        """
-    
-    return condition_concat_string
-
-
-
 class CollectionRequest:
     def __init__(self,name,phases,max_repo = 10,days_until_collect_again = 1, gitlab_phases=None):
         self.name = name

From e080f6112a91a00be1a49d68b8207930a7bfdc4d Mon Sep 17 00:00:00 2001
From: Isaac Milarsky <imilarsky@gmail.com>
Date: Tue, 28 Oct 2025 18:01:15 -0500
Subject: [PATCH 002/104] add toggle option for messages

Signed-off-by: Isaac Milarsky <imilarsky@gmail.com>
---
 augur/application/config.py                                | 3 ++-
 augur/tasks/git/facade_tasks.py                            | 5 +++--
 augur/tasks/git/util/facade_worker/facade_worker/config.py | 1 +
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/augur/application/config.py b/augur/application/config.py
index 2cc6f65cdb..776bce63d1 100644
--- a/augur/application/config.py
+++ b/augur/application/config.py
@@ -52,7 +52,8 @@ def get_development_flag():
                 "rebuild_caches": 1,
                 "run_analysis": 1,
                 "run_facade_contributors": 1,
-                "facade_contributor_full_recollect": 0
+                "facade_contributor_full_recollect": 0,
+                "commit_messages": 1,
             },
             "Server": {
                 "cache_expire": "3600",
diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py
index 5baaed20d4..826fc023e1 100644
--- a/augur/tasks/git/facade_tasks.py
+++ b/augur/tasks/git/facade_tasks.py
@@ -315,7 +315,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
                     facade_bulk_insert_commits(logger, pendingCommitRecordsToInsert)
                 pendingCommitRecordsToInsert = []
 
-        if commit_msg:
+        if commit_msg and facade_helper.commit_messages:
             pendingCommitMessageRecordsToInsert.append(commit_msg)
 
         if len(pendingCommitMessageRecordsToInsert) >= 1000:
@@ -446,7 +446,8 @@ def generate_analysis_sequence(logger,repo_git, facade_helper):
 
     analysis_sequence.append(trim_commits_post_analysis_facade_task.si(repo_git))
 
-    analysis_sequence.append(facade_fetch_missing_commit_messages.si(repo_git))
+    if facade_helper.commit_messages:
+        analysis_sequence.append(facade_fetch_missing_commit_messages.si(repo_git))
     
     analysis_sequence.append(facade_analysis_end_facade_task.si())
     
diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py
index f060b34390..49bbca7e88 100644
--- a/augur/tasks/git/util/facade_worker/facade_worker/config.py
+++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py
@@ -128,6 +128,7 @@ def __init__(self,logger: Logger):
         self.multithreaded = worker_options["multithreaded"]
         self.create_xlsx_summary_files = worker_options["create_xlsx_summary_files"]
         self.facade_contributor_full_recollect = worker_options["facade_contributor_full_recollect"]
+        self.commit_messages = worker_options["commit_messages"]
 
         self.tool_source = "Facade"
         self.data_source = "Git Log"

From d20c672e5b620b643f606701258789db3628d265 Mon Sep 17 00:00:00 2001
From: Xiaoha <blairjade183@gmail.com>
Date: Wed, 12 Nov 2025 01:24:04 +0000
Subject: [PATCH 003/104] feat: Add Topic Modeling database schema tables

Add two new tables and ORM models for Topic Modeling versioning system:

1. topic_model_meta table (Migration 35):
   - Stores metadata for each trained topic model
   - 21 fields including model_id (UUID PK), repo_id (FK), training parameters,
     quality metrics (coherence_score, topic_diversity), and visualization data
   - Enables model versioning, comparison, and intelligent retraining

2. topic_model_event table (Migration 36):
   - Audit log for topic modeling events
   - Tracks training lifecycle: started, completed, retrain triggered, etc.
   - Provides observability for automated and manual training operations

3. TopicModelMeta ORM model:
   - SQLAlchemy model definition for topic_model_meta table
   - Relationships and field mappings for application layer

These schema changes support the Topic Modeling feature that enables:
- Automated NMF-based topic extraction from repository messages
- Model version management and comparison
- Intelligent retraining based on data/quality changes
- Storage optimization via REPLACE strategy for automatic runs

Related: #3207
Signed-off-by: Xiaoha <blairjade183@gmail.com>
---
 augur/application/db/models/__init__.py       |   1 +
 augur/application/db/models/augur_data.py     | 104 ++++++++++++++++++
 .../35_create_topic_model_meta_table.py       |  69 ++++++++++++
 .../versions/36_add_topic_model_event.py      |  63 +++++++++++
 4 files changed, 237 insertions(+)
 create mode 100644 augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
 create mode 100644 augur/application/schema/alembic/versions/36_add_topic_model_event.py

diff --git a/augur/application/db/models/__init__.py b/augur/application/db/models/__init__.py
index 013f22ab42..f729f0ac1e 100644
--- a/augur/application/db/models/__init__.py
+++ b/augur/application/db/models/__init__.py
@@ -39,6 +39,7 @@
     RepoSbomScan,
     RepoStat,
     RepoTopic,
+    TopicModelMeta,
     CommitCommentRef,
     CommitParent,
     DiscourseInsight,
diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py
index c80077d9b6..9751cfb5f4 100644
--- a/augur/application/db/models/augur_data.py
+++ b/augur/application/db/models/augur_data.py
@@ -3601,3 +3601,107 @@ class RepoClone(Base):
     clone_data_timestamp = Column(TIMESTAMP(precision=6))
 
     repo = relationship("Repo")
+
+
+class TopicModelMeta(Base):
+    __tablename__ = "topic_model_meta"
+    __table_args__ = {"schema": "augur_data"}
+
+    model_id = Column(
+        UUID(as_uuid=True),
+        primary_key=True,
+        server_default=text("gen_random_uuid()"),
+        comment="Unique identifier for the topic model"
+    )
+    repo_id = Column(
+        ForeignKey("augur_data.repo.repo_id"),
+        comment="Repository this model was trained on"
+    )
+    model_method = Column(
+        String,
+        nullable=False,
+        comment="Method used for topic modeling (e.g., 'NMF_COUNT', 'LDA_TFIDF')"
+    )
+    num_topics = Column(
+        Integer,
+        nullable=False,
+        comment="Number of topics in the model"
+    )
+    num_words_per_topic = Column(
+        Integer,
+        nullable=False,
+        comment="Number of words per topic"
+    )
+    training_parameters = Column(
+        JSON,
+        nullable=False,
+        comment="JSON object containing training parameters"
+    )
+    model_file_paths = Column(
+        JSON,
+        nullable=False,
+        comment="JSON object containing paths to model artifacts"
+    )
+    parameters_hash = Column(
+        String,
+        nullable=False,
+        comment="Hash of parameters for deduplication"
+    )
+    coherence_score = Column(
+        Float,
+        nullable=False,
+        server_default=text("0.0"),
+        comment="Coherence score of the model"
+    )
+    perplexity_score = Column(
+        Float,
+        nullable=False,
+        server_default=text("0.0"),
+        comment="Perplexity score of the model"
+    )
+    topic_diversity = Column(
+        Float,
+        nullable=False,
+        server_default=text("0.0"),
+        comment="Topic diversity score"
+    )
+    quality = Column(
+        JSON,
+        nullable=False,
+        server_default=text("'{}'::jsonb"),
+        comment="Quality metrics"
+    )
+    training_message_count = Column(
+        BigInteger,
+        nullable=False,
+        comment="Number of messages used for training"
+    )
+    data_fingerprint = Column(
+        JSON,
+        nullable=False,
+        comment="Fingerprint of training data"
+    )
+    visualization_data = Column(
+        JSON,
+        nullable=True,
+        comment="JSON object containing visualization data for the model"
+    )
+    training_start_time = Column(
+        TIMESTAMP(),
+        nullable=False,
+        comment="When training started"
+    )
+    training_end_time = Column(
+        TIMESTAMP(),
+        nullable=False,
+        comment="When training ended"
+    )
+    tool_source = Column(String, comment="Standard Augur Metadata")
+    tool_version = Column(String, comment="Standard Augur Metadata")
+    data_source = Column(String, comment="Standard Augur Metadata")
+    data_collection_date = Column(
+        TIMESTAMP(precision=0),
+        server_default=text("CURRENT_TIMESTAMP")
+    )
+
+    repo = relationship("Repo")
diff --git a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
new file mode 100644
index 0000000000..9e4a00c3cc
--- /dev/null
+++ b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
@@ -0,0 +1,69 @@
+"""Create topic_model_meta table
+
+Revision ID: 35
+Revises: 34
+Create Date: 2024-08-28 20:30:00.000000
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '35'
+down_revision = '34'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # Create topic_model_meta table based on ER diagram with NOT NULL constraints
+    op.create_table('topic_model_meta',
+        # Primary key
+        sa.Column('model_id', sa.UUID(), server_default=sa.text('gen_random_uuid()'), nullable=False),
+        
+        # Foreign key to repo (nullable for multi-repo training)
+        sa.Column('repo_id', sa.Integer(), nullable=True),
+        
+        # Model metadata (all NOT NULL as requested)
+        sa.Column('model_method', sa.String(), nullable=False),
+        sa.Column('num_topics', sa.Integer(), nullable=False),
+        sa.Column('num_words_per_topic', sa.Integer(), nullable=False),
+        
+        # Parameters and configuration (NOT NULL)
+        sa.Column('training_parameters', postgresql.JSONB(), nullable=False),
+        sa.Column('model_file_paths', postgresql.JSONB(), nullable=False),
+        sa.Column('parameters_hash', sa.String(), nullable=False),
+        
+        # Quality metrics (NOT NULL, but can use default values)
+        sa.Column('coherence_score', sa.Float(), nullable=False, server_default=sa.text('0.0')),
+        sa.Column('perplexity_score', sa.Float(), nullable=False, server_default=sa.text('0.0')),
+        sa.Column('topic_diversity', sa.Float(), nullable=False, server_default=sa.text('0.0')),
+        sa.Column('quality', postgresql.JSONB(), nullable=False, server_default=sa.text("'{}'::jsonb")),
+        
+        # Training metadata (NOT NULL)
+        sa.Column('training_message_count', sa.BigInteger(), nullable=False),
+        sa.Column('data_fingerprint', postgresql.JSONB(), nullable=False),
+        
+        # Visualization data (optional)
+        sa.Column('visualization_data', postgresql.JSONB(), nullable=True),
+        
+        # Timestamps (NOT NULL with defaults)
+        sa.Column('training_start_time', sa.TIMESTAMP(), nullable=False),
+        sa.Column('training_end_time', sa.TIMESTAMP(), nullable=False),
+        sa.Column('data_collection_date', sa.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+        
+        # Standard Augur metadata (NOT NULL)
+        sa.Column('tool_source', sa.String(), nullable=False),
+        sa.Column('tool_version', sa.String(), nullable=False),
+        sa.Column('data_source', sa.String(), nullable=False),
+        
+        # Constraints
+        sa.ForeignKeyConstraint(['repo_id'], ['augur_data.repo.repo_id'], ),
+        sa.PrimaryKeyConstraint('model_id'),
+        schema='augur_data'
+    )
+
+
+def downgrade():
+    op.drop_table('topic_model_meta', schema='augur_data') 
\ No newline at end of file
diff --git a/augur/application/schema/alembic/versions/36_add_topic_model_event.py b/augur/application/schema/alembic/versions/36_add_topic_model_event.py
new file mode 100644
index 0000000000..8edcd90609
--- /dev/null
+++ b/augur/application/schema/alembic/versions/36_add_topic_model_event.py
@@ -0,0 +1,63 @@
+"""
+Create topic_model_event table for DB event logging
+
+Revision ID: 36
+Revises: 35
+Create Date: 2025-08-21
+"""
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "36"
+down_revision = "35"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.create_table(
+        "topic_model_event",
+        sa.Column("event_id", sa.BigInteger(), primary_key=True),
+        sa.Column(
+            "ts",
+            sa.TIMESTAMP(),
+            server_default=sa.text("CURRENT_TIMESTAMP"),
+            nullable=False,
+        ),
+        sa.Column("repo_id", sa.Integer(), nullable=True),
+        sa.Column("model_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("event", sa.Text(), nullable=False),
+        sa.Column("level", sa.Text(), server_default=sa.text("'INFO'"), nullable=False),
+        sa.Column("payload", postgresql.JSONB(astext_type=sa.Text()), nullable=False),
+        sa.ForeignKeyConstraint(
+            ["repo_id"], ["augur_data.repo.repo_id"], name="fk_tme_repo_id"
+        ),
+        sa.ForeignKeyConstraint(
+            ["model_id"],
+            ["augur_data.topic_model_meta.model_id"],
+            name="fk_tme_model_id",
+            ondelete="SET NULL",
+        ),
+        schema="augur_data",
+    )
+    op.create_index(
+        "ix_tme_repo_ts", "topic_model_event", ["repo_id", "ts"], schema="augur_data"
+    )
+    op.create_index("ix_tme_event", "topic_model_event", ["event"], schema="augur_data")
+    op.create_index(
+        "ix_tme_payload",
+        "topic_model_event",
+        [sa.text("(payload)")],
+        unique=False,
+        schema="augur_data",
+        postgresql_using="gin",
+    )
+
+
+def downgrade():
+    op.drop_index("ix_tme_payload", table_name="topic_model_event", schema="augur_data")
+    op.drop_index("ix_tme_event", table_name="topic_model_event", schema="augur_data")
+    op.drop_index("ix_tme_repo_ts", table_name="topic_model_event", schema="augur_data")
+    op.drop_table("topic_model_event", schema="augur_data")

From d40e9acb91657b627cede8ca7e7c56e04de6fb0b Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Wed, 12 Nov 2025 11:14:32 -0500
Subject: [PATCH 004/104] fix typos in the tests folder.

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 Makefile                                      |  2 +-
 pyproject.toml                                |  2 +-
 .../test_cli/test_add_cli_repos.py            |  2 +-
 .../test_cli/test_cli_functionality.py        |  0
 .../test_config/test_config.py                |  0
 .../test_models/test_augur_data/test_repo.py  |  2 +-
 .../test_augur_data/test_repo_group.py        |  2 +-
 .../test_augur_operations/test_user.py        |  2 +-
 .../test_augur_operations/test_user_group.py  |  2 +-
 .../test_augur_operations/test_user_repo.py   |  2 +-
 .../test_db/test_session.py                   |  0
 .../test_repo_load_controller/helper.py       |  0
 .../test_adding_orgs.py                       |  2 +-
 .../test_adding_repos.py                      |  2 +-
 .../test_helper_functions.py                  |  2 +-
 .../test_repo_load_controller/util.py         |  0
 .../test_github_random_key_auth.py            |  0
 .../test_key_auth/test_random_key_auth.py     |  0
 .../test_github_api_key_handler.py            |  0
 .../test_paginators/test_github_paginator.py  |  0
 .../test_redis/test_redis_list.py             |  0
 .../test_util/test_worker_util.py             |  0
 .../test_endpoints.py                         |  2 +-
 tests/test_workers/test_set_up_fixtures.py    |  6 +++---
 .../bad_Data.json                             |  0
 .../contributors.json                         |  0
 .../contributors_un_enriched.json             |  0
 .../standard_enrich_cntrb_id_data.json        |  0
 .../test_enrich_cntrb_id.py                   |  2 +-
 .../test_enrich_data_primary_keys.py          | 20 +++++++++----------
 .../util_persistence.py}                      |  8 ++++----
 31 files changed, 30 insertions(+), 30 deletions(-)
 rename tests/{test_applicaton => test_application}/test_cli/test_add_cli_repos.py (98%)
 rename tests/{test_applicaton => test_application}/test_cli/test_cli_functionality.py (100%)
 rename tests/{test_applicaton => test_application}/test_config/test_config.py (100%)
 rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_data/test_repo.py (98%)
 rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_data/test_repo_group.py (96%)
 rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_operations/test_user.py (98%)
 rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_operations/test_user_group.py (99%)
 rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_operations/test_user_repo.py (99%)
 rename tests/{test_applicaton => test_application}/test_db/test_session.py (100%)
 rename tests/{test_applicaton => test_application}/test_repo_load_controller/helper.py (100%)
 rename tests/{test_applicaton => test_application}/test_repo_load_controller/test_adding_orgs.py (80%)
 rename tests/{test_applicaton => test_application}/test_repo_load_controller/test_adding_repos.py (99%)
 rename tests/{test_applicaton => test_application}/test_repo_load_controller/test_helper_functions.py (99%)
 rename tests/{test_applicaton => test_application}/test_repo_load_controller/util.py (100%)
 rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_key_auth/test_github_random_key_auth.py (100%)
 rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_key_auth/test_random_key_auth.py (100%)
 rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_key_handler/test_github_api_key_handler.py (100%)
 rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_paginators/test_github_paginator.py (100%)
 rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_redis/test_redis_list.py (100%)
 rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_util/test_worker_util.py (100%)
 rename tests/test_workers/{worker_persistance => worker_persistence}/bad_Data.json (100%)
 rename tests/test_workers/{worker_persistance => worker_persistence}/contributors.json (100%)
 rename tests/test_workers/{worker_persistance => worker_persistence}/contributors_un_enriched.json (100%)
 rename tests/test_workers/{worker_persistance => worker_persistence}/standard_enrich_cntrb_id_data.json (100%)
 rename tests/test_workers/{worker_persistance => worker_persistence}/test_enrich_cntrb_id.py (97%)
 rename tests/test_workers/{worker_persistance => worker_persistence}/test_enrich_data_primary_keys.py (94%)
 rename tests/test_workers/{worker_persistance/util_persistance.py => worker_persistence/util_persistence.py} (80%)

diff --git a/Makefile b/Makefile
index 4fe926edc4..c00d789faa 100644
--- a/Makefile
+++ b/Makefile
@@ -99,7 +99,7 @@ test-data:
 test:
 	# @ pytest tests/test_tasks/test_github_tasks/
 	@ python3 tests/start_server.py
-	@ pytest tests/test_metrics/test_metrics_functionality/ tests/test_routes/test_api_functionality/ tests/test_tasks/ tests/test_applicaton/ 
+	@ pytest tests/test_metrics/test_metrics_functionality/ tests/test_routes/test_api_functionality/ tests/test_tasks/ tests/test_application/ 
 	@ python3 tests/stop_server.py
 
 test-api:
diff --git a/pyproject.toml b/pyproject.toml
index ddaed4301d..529771dca0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -180,7 +180,7 @@ legacy_tox_ini = """
         metric-routes: python tests/test_routes/runner.py
         workers: pytest tests/test_workers/
         classes: pytest tests/test_classes/
-        worker-persistance: pytest test/test_workers/worker_persistance/
+        worker-persistence: pytest test/test_workers/worker_persistence/
 
     [pytest]
     addopts = -ra -s
diff --git a/tests/test_applicaton/test_cli/test_add_cli_repos.py b/tests/test_application/test_cli/test_add_cli_repos.py
similarity index 98%
rename from tests/test_applicaton/test_cli/test_add_cli_repos.py
rename to tests/test_application/test_cli/test_add_cli_repos.py
index 42f342d2bd..7905308e0a 100644
--- a/tests/test_applicaton/test_cli/test_add_cli_repos.py
+++ b/tests/test_application/test_cli/test_add_cli_repos.py
@@ -1,7 +1,7 @@
 import pytest
 import logging
 
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.tasks.github.util.github_task_session import GithubTaskSession
 from augur.util.repo_load_controller import RepoLoadController, CLI_USER_ID
 
diff --git a/tests/test_applicaton/test_cli/test_cli_functionality.py b/tests/test_application/test_cli/test_cli_functionality.py
similarity index 100%
rename from tests/test_applicaton/test_cli/test_cli_functionality.py
rename to tests/test_application/test_cli/test_cli_functionality.py
diff --git a/tests/test_applicaton/test_config/test_config.py b/tests/test_application/test_config/test_config.py
similarity index 100%
rename from tests/test_applicaton/test_config/test_config.py
rename to tests/test_application/test_config/test_config.py
diff --git a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py b/tests/test_application/test_db/test_models/test_augur_data/test_repo.py
similarity index 98%
rename from tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py
rename to tests/test_application/test_db/test_models/test_augur_data/test_repo.py
index dd1ef44b79..fffe3e13ff 100644
--- a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py
+++ b/tests/test_application/test_db/test_models/test_augur_data/test_repo.py
@@ -5,7 +5,7 @@
 
 from augur.application.db.session import DatabaseSession
 from augur.tasks.github.util.github_task_session import GithubTaskSession
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.application.db.models import Repo
 logger = logging.getLogger(__name__)
 
diff --git a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo_group.py b/tests/test_application/test_db/test_models/test_augur_data/test_repo_group.py
similarity index 96%
rename from tests/test_applicaton/test_db/test_models/test_augur_data/test_repo_group.py
rename to tests/test_application/test_db/test_models/test_augur_data/test_repo_group.py
index 4367542db8..4c144bb8e1 100644
--- a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo_group.py
+++ b/tests/test_application/test_db/test_models/test_augur_data/test_repo_group.py
@@ -3,7 +3,7 @@
 import sqlalchemy as s
 
 from augur.application.db.session import DatabaseSession
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.application.db.models import RepoGroup
 
 logger = logging.getLogger(__name__)
diff --git a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user.py b/tests/test_application/test_db/test_models/test_augur_operations/test_user.py
similarity index 98%
rename from tests/test_applicaton/test_db/test_models/test_augur_operations/test_user.py
rename to tests/test_application/test_db/test_models/test_augur_operations/test_user.py
index 6011405b26..b5c0db623a 100644
--- a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user.py
+++ b/tests/test_application/test_db/test_models/test_augur_operations/test_user.py
@@ -5,7 +5,7 @@
 
 from augur.application.db.session import DatabaseSession
 from augur.tasks.github.util.github_task_session import GithubTaskSession
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.application.db.models import User
 
 
diff --git a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_group.py b/tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py
similarity index 99%
rename from tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_group.py
rename to tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py
index 70fa11ecb0..1bcac18aed 100644
--- a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_group.py
+++ b/tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py
@@ -3,7 +3,7 @@
 import sqlalchemy as s
 
 from augur.application.db.session import DatabaseSession
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.application.db.models import UserGroup
 
 logger = logging.getLogger(__name__)
diff --git a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py b/tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py
similarity index 99%
rename from tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py
rename to tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py
index 4b288cbabb..da97fb4344 100644
--- a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py
+++ b/tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py
@@ -5,7 +5,7 @@
 
 from augur.application.db.session import DatabaseSession
 from augur.tasks.github.util.github_task_session import GithubTaskSession
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.application.db.models import UserRepo
 
 logger = logging.getLogger(__name__)
diff --git a/tests/test_applicaton/test_db/test_session.py b/tests/test_application/test_db/test_session.py
similarity index 100%
rename from tests/test_applicaton/test_db/test_session.py
rename to tests/test_application/test_db/test_session.py
diff --git a/tests/test_applicaton/test_repo_load_controller/helper.py b/tests/test_application/test_repo_load_controller/helper.py
similarity index 100%
rename from tests/test_applicaton/test_repo_load_controller/helper.py
rename to tests/test_application/test_repo_load_controller/helper.py
diff --git a/tests/test_applicaton/test_repo_load_controller/test_adding_orgs.py b/tests/test_application/test_repo_load_controller/test_adding_orgs.py
similarity index 80%
rename from tests/test_applicaton/test_repo_load_controller/test_adding_orgs.py
rename to tests/test_application/test_repo_load_controller/test_adding_orgs.py
index 1b8effe568..f053959628 100644
--- a/tests/test_applicaton/test_repo_load_controller/test_adding_orgs.py
+++ b/tests/test_application/test_repo_load_controller/test_adding_orgs.py
@@ -1,7 +1,7 @@
 import pytest
 import logging
 
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.tasks.github.util.github_task_session import GithubTaskSession
 
 from augur.util.repo_load_controller import RepoLoadController, DEFAULT_REPO_GROUP_IDS, CLI_USER_ID
diff --git a/tests/test_applicaton/test_repo_load_controller/test_adding_repos.py b/tests/test_application/test_repo_load_controller/test_adding_repos.py
similarity index 99%
rename from tests/test_applicaton/test_repo_load_controller/test_adding_repos.py
rename to tests/test_application/test_repo_load_controller/test_adding_repos.py
index 7f65b1e017..366874455e 100644
--- a/tests/test_applicaton/test_repo_load_controller/test_adding_repos.py
+++ b/tests/test_application/test_repo_load_controller/test_adding_repos.py
@@ -1,7 +1,7 @@
 import pytest
 import logging
 
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.tasks.github.util.github_task_session import GithubTaskSession
 
 from augur.util.repo_load_controller import RepoLoadController, DEFAULT_REPO_GROUP_IDS, CLI_USER_ID
diff --git a/tests/test_applicaton/test_repo_load_controller/test_helper_functions.py b/tests/test_application/test_repo_load_controller/test_helper_functions.py
similarity index 99%
rename from tests/test_applicaton/test_repo_load_controller/test_helper_functions.py
rename to tests/test_application/test_repo_load_controller/test_helper_functions.py
index ab9222a920..a9dbd65763 100644
--- a/tests/test_applicaton/test_repo_load_controller/test_helper_functions.py
+++ b/tests/test_application/test_repo_load_controller/test_helper_functions.py
@@ -5,7 +5,7 @@
 
 from augur.application.db.session import DatabaseSession
 from augur.tasks.github.util.github_task_session import GithubTaskSession
-from tests.test_applicaton.test_repo_load_controller.helper import *
+from tests.test_application.test_repo_load_controller.helper import *
 from augur.application.db.models import Repo, RepoGroup, UserRepo, UserGroup
 
 logger = logging.getLogger(__name__)
diff --git a/tests/test_applicaton/test_repo_load_controller/util.py b/tests/test_application/test_repo_load_controller/util.py
similarity index 100%
rename from tests/test_applicaton/test_repo_load_controller/util.py
rename to tests/test_application/test_repo_load_controller/util.py
diff --git a/tests/test_tasks/test_task_utlities/test_key_auth/test_github_random_key_auth.py b/tests/test_tasks/test_task_utilities/test_key_auth/test_github_random_key_auth.py
similarity index 100%
rename from tests/test_tasks/test_task_utlities/test_key_auth/test_github_random_key_auth.py
rename to tests/test_tasks/test_task_utilities/test_key_auth/test_github_random_key_auth.py
diff --git a/tests/test_tasks/test_task_utlities/test_key_auth/test_random_key_auth.py b/tests/test_tasks/test_task_utilities/test_key_auth/test_random_key_auth.py
similarity index 100%
rename from tests/test_tasks/test_task_utlities/test_key_auth/test_random_key_auth.py
rename to tests/test_tasks/test_task_utilities/test_key_auth/test_random_key_auth.py
diff --git a/tests/test_tasks/test_task_utlities/test_key_handler/test_github_api_key_handler.py b/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py
similarity index 100%
rename from tests/test_tasks/test_task_utlities/test_key_handler/test_github_api_key_handler.py
rename to tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py
diff --git a/tests/test_tasks/test_task_utlities/test_paginators/test_github_paginator.py b/tests/test_tasks/test_task_utilities/test_paginators/test_github_paginator.py
similarity index 100%
rename from tests/test_tasks/test_task_utlities/test_paginators/test_github_paginator.py
rename to tests/test_tasks/test_task_utilities/test_paginators/test_github_paginator.py
diff --git a/tests/test_tasks/test_task_utlities/test_redis/test_redis_list.py b/tests/test_tasks/test_task_utilities/test_redis/test_redis_list.py
similarity index 100%
rename from tests/test_tasks/test_task_utlities/test_redis/test_redis_list.py
rename to tests/test_tasks/test_task_utilities/test_redis/test_redis_list.py
diff --git a/tests/test_tasks/test_task_utlities/test_util/test_worker_util.py b/tests/test_tasks/test_task_utilities/test_util/test_worker_util.py
similarity index 100%
rename from tests/test_tasks/test_task_utlities/test_util/test_worker_util.py
rename to tests/test_tasks/test_task_utilities/test_util/test_worker_util.py
diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py b/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py
index c27ebf4ed3..0b3cda6621 100644
--- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py
+++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py
@@ -1,5 +1,5 @@
 #SPDX-License-Identifier: MIT
-from tests.test_workers.worker_persistance.util_persistance import *
+from tests.test_workers.worker_persistence.util_persistence import *
 import pandas as pd
 #from augur.cli import add_repos
 #from augur.cli import add_repo_groups
diff --git a/tests/test_workers/test_set_up_fixtures.py b/tests/test_workers/test_set_up_fixtures.py
index 3add1f83fb..4109bcda55 100644
--- a/tests/test_workers/test_set_up_fixtures.py
+++ b/tests/test_workers/test_set_up_fixtures.py
@@ -100,7 +100,7 @@ def database_connection():
 # Define a dummy worker class that gets the methods we need without running super().__init__
 
 
-class DummyPersistance(Persistant):
+class DummyPersistence(Persistent):
     def __init__(self, database_connection):
         self.db = database_connection
         self.logger = logging.getLogger()
@@ -127,7 +127,7 @@ def __init__(self, database_connection, config={}):
 
         self.platform = "github"
         # first set up logging.
-        self._root_augur_dir = Persistant.ROOT_AUGUR_DIR
+        self._root_augur_dir = Persistent.ROOT_AUGUR_DIR
         self.augur_config = AugurConfig(self._root_augur_dir)
 
         # Get default logging settings
@@ -146,7 +146,7 @@ def __init__(self, database_connection, config={}):
         self.tool_version = '\'1.0.1\''
         self.data_source = '\'Worker test Data\''
 
-    # This mirros the functionality of the definition found in worker_persistance to make
+    # This mirrors the functionality of the definition found in worker_persistence to make
     # github related function calls much much easier to test.
     def initialize_database_connections(self):
         DB_STR = 'postgresql://{}:{}@{}:{}/{}'.format(
diff --git a/tests/test_workers/worker_persistance/bad_Data.json b/tests/test_workers/worker_persistence/bad_Data.json
similarity index 100%
rename from tests/test_workers/worker_persistance/bad_Data.json
rename to tests/test_workers/worker_persistence/bad_Data.json
diff --git a/tests/test_workers/worker_persistance/contributors.json b/tests/test_workers/worker_persistence/contributors.json
similarity index 100%
rename from tests/test_workers/worker_persistance/contributors.json
rename to tests/test_workers/worker_persistence/contributors.json
diff --git a/tests/test_workers/worker_persistance/contributors_un_enriched.json b/tests/test_workers/worker_persistence/contributors_un_enriched.json
similarity index 100%
rename from tests/test_workers/worker_persistance/contributors_un_enriched.json
rename to tests/test_workers/worker_persistence/contributors_un_enriched.json
diff --git a/tests/test_workers/worker_persistance/standard_enrich_cntrb_id_data.json b/tests/test_workers/worker_persistence/standard_enrich_cntrb_id_data.json
similarity index 100%
rename from tests/test_workers/worker_persistance/standard_enrich_cntrb_id_data.json
rename to tests/test_workers/worker_persistence/standard_enrich_cntrb_id_data.json
diff --git a/tests/test_workers/worker_persistance/test_enrich_cntrb_id.py b/tests/test_workers/worker_persistence/test_enrich_cntrb_id.py
similarity index 97%
rename from tests/test_workers/worker_persistance/test_enrich_cntrb_id.py
rename to tests/test_workers/worker_persistence/test_enrich_cntrb_id.py
index f7fd67bb5b..4dbcb4f91b 100644
--- a/tests/test_workers/worker_persistance/test_enrich_cntrb_id.py
+++ b/tests/test_workers/worker_persistence/test_enrich_cntrb_id.py
@@ -1,5 +1,5 @@
 #SPDX-License-Identifier: MIT
-from tests.test_workers.worker_persistance.util_persistance import *
+from tests.test_workers.worker_persistence.util_persistence import *
 
 
 #WIP
diff --git a/tests/test_workers/worker_persistance/test_enrich_data_primary_keys.py b/tests/test_workers/worker_persistence/test_enrich_data_primary_keys.py
similarity index 94%
rename from tests/test_workers/worker_persistance/test_enrich_data_primary_keys.py
rename to tests/test_workers/worker_persistence/test_enrich_data_primary_keys.py
index a13f5a639a..df7b98bca5 100644
--- a/tests/test_workers/worker_persistance/test_enrich_data_primary_keys.py
+++ b/tests/test_workers/worker_persistence/test_enrich_data_primary_keys.py
@@ -1,6 +1,6 @@
 #SPDX-License-Identifier: MIT
 
-from tests.test_workers.worker_persistance.util_persistance import *
+from tests.test_workers.worker_persistence.util_persistence import *
 
 
 
@@ -62,12 +62,12 @@ def test_enrich_data_primary_keys_standard_input(database_connection, sample_sou
     database_connection.execute(tableDict['contributors_table'].insert().values(cntrb))
     
     #create class for enrichment
-    dummyPersistant = DummyPersistance(database_connection)
+    dummyPersistent = DummyPersistence(database_connection)
     
     gh_merge_fields = ['avatar_url']
     augur_merge_fields = ['gh_avatar_url']
     
-    dummyPersistant.enrich_data_primary_keys(sample_source_data_enriched, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields)
+    dummyPersistent.enrich_data_primary_keys(sample_source_data_enriched, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields)
     
     #now test each record to make sure that they have an avatar_url
     avatar_url_sql = s.sql.text("""
@@ -87,11 +87,11 @@ def test_enrich_data_primary_keys_bad_data(database_connection):
     augur_merge_fields = ['gh_avatar_url']
     
     #create class for enrichment
-    dummyPersistant = DummyPersistance(database_connection)
+    dummyPersistent = DummyPersistence(database_connection)
     
     #Make sure that function rejects null data
-    assert dummyPersistant.enrich_data_primary_keys({}, "contributors_table", gh_merge_fields, augur_merge_fields) == {}
-    assert dummyPersistant.enrich_data_primary_keys(None, "contributors_table", gh_merge_fields, augur_merge_fields) == None
+    assert dummyPersistent.enrich_data_primary_keys({}, "contributors_table", gh_merge_fields, augur_merge_fields) == {}
+    assert dummyPersistent.enrich_data_primary_keys(None, "contributors_table", gh_merge_fields, augur_merge_fields) == None
 
 
 def test_enrich_data_primary_keys_redundant_enrich(database_connection,sample_source_data_enriched, sample_source_data_unenriched):
@@ -151,12 +151,12 @@ def test_enrich_data_primary_keys_redundant_enrich(database_connection,sample_so
     database_connection.execute(tableDict['contributors_table'].insert().values(cntrb))
     
     #create class for enrichment
-    dummyPersistant = DummyPersistance(database_connection)
+    dummyPersistent = DummyPersistence(database_connection)
     
     gh_merge_fields = ['avatar_url']
     augur_merge_fields = ['gh_avatar_url']
     
-    dummyPersistant.enrich_data_primary_keys(sample_source_data_enriched, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields)
+    dummyPersistent.enrich_data_primary_keys(sample_source_data_enriched, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields)
     
     #now test each record to make sure that they have an avatar_url
     avatar_url_sql = s.sql.text("""
@@ -229,11 +229,11 @@ def test_enrich_data_primary_keys_standard_input(database_connection, sample_sou
     database_connection.execute(tableDict['contributors_table'].insert().values(cntrb))
     
     #create class for enrichment
-    dummyPersistant = DummyPersistance(database_connection)
+    dummyPersistent = DummyPersistence(database_connection)
     
     gh_merge_fields = ['avatar_url']
     augur_merge_fields = ['gh_avatar_url']
     
-    dummyPersistant.enrich_data_primary_keys(sample_source_data_bad_api_return, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields)
+    dummyPersistent.enrich_data_primary_keys(sample_source_data_bad_api_return, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields)
     
     return
\ No newline at end of file
diff --git a/tests/test_workers/worker_persistance/util_persistance.py b/tests/test_workers/worker_persistence/util_persistence.py
similarity index 80%
rename from tests/test_workers/worker_persistance/util_persistance.py
rename to tests/test_workers/worker_persistence/util_persistence.py
index 118d652b7e..e1a9a6e982 100644
--- a/tests/test_workers/worker_persistance/util_persistance.py
+++ b/tests/test_workers/worker_persistence/util_persistence.py
@@ -8,7 +8,7 @@
 #Sample source data generation that pulls json data that has contributions listed
 @pytest.fixture
 def sample_source_data_enriched():
-    jsonFile = open("tests/test_workers/worker_persistance/contributors.json")
+    jsonFile = open("tests/test_workers/worker_persistence/contributors.json")
 
     source_data = json.load(jsonFile)
 
@@ -18,7 +18,7 @@ def sample_source_data_enriched():
 #Sample source data generation that opens json data that doesn't have contributions listed
 @pytest.fixture
 def sample_source_data_unenriched():
-    jsonFile = open("tests/test_workers/worker_persistance/contributors_un_enriched.json")
+    jsonFile = open("tests/test_workers/worker_persistence/contributors_un_enriched.json")
 
     source_data = json.load(jsonFile)
 
@@ -28,7 +28,7 @@ def sample_source_data_unenriched():
 #Bad data that an api might return
 @pytest.fixture
 def sample_source_data_bad_api_return():
-    jsonFile = open("tests/test_workers/worker_persistance/bad_Data.json")
+    jsonFile = open("tests/test_workers/worker_persistence/bad_Data.json")
 
     source_data = json.load(jsonFile)
 
@@ -39,7 +39,7 @@ def sample_source_data_bad_api_return():
 #Sample data for comments api return
 @pytest.fixture
 def sample_source_data_standard_github_comments():
-    jsonFile = open("tests/test_workers/worker_persistance/standard_enrich_cntrb_id_data.json")
+    jsonFile = open("tests/test_workers/worker_persistence/standard_enrich_cntrb_id_data.json")
     
     source_data = json.load(jsonFile)
     

From 6702c35cfbbf88b03b300e66723a3e5a322f4a9c Mon Sep 17 00:00:00 2001
From: Xiaoha <blairjade183@gmail.com>
Date: Wed, 12 Nov 2025 17:03:52 +0000
Subject: [PATCH 005/104] refactor: Remove payload index to match Augur
 conventions

- All JSON/JSONB fields in Augur have NO indexes
- Verified: repo_badging.data (JSONB), chaoss_metric_status.cm_info (JSON), etc.
- payload is used for display, not filtering
- Query performance relies on ix_tme_repo_ts and ix_tme_event indexes

Signed-off-by: Xiaoha <blairjade183@gmail.com>
---
 .../alembic/versions/36_add_topic_model_event.py       | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/augur/application/schema/alembic/versions/36_add_topic_model_event.py b/augur/application/schema/alembic/versions/36_add_topic_model_event.py
index 8edcd90609..a2cb78ffd3 100644
--- a/augur/application/schema/alembic/versions/36_add_topic_model_event.py
+++ b/augur/application/schema/alembic/versions/36_add_topic_model_event.py
@@ -46,13 +46,11 @@ def upgrade():
         "ix_tme_repo_ts", "topic_model_event", ["repo_id", "ts"], schema="augur_data"
     )
     op.create_index("ix_tme_event", "topic_model_event", ["event"], schema="augur_data")
+    # btree index on payload for exact match queries (following Augur conventions)
+    # Note: btree only supports equality comparison, not JSON containment queries
     op.create_index(
-        "ix_tme_payload",
-        "topic_model_event",
-        [sa.text("(payload)")],
-        unique=False,
-        schema="augur_data",
-        postgresql_using="gin",
+        "ix_tme_payload", "topic_model_event", ["payload"], 
+        unique=False, schema="augur_data"
     )
 
 

From a96e62ff7ffebf4b74cd4987344807234e770ec4 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Wed, 12 Nov 2025 13:08:06 -0500
Subject: [PATCH 006/104] fix incorrect path for worker persistence in pytest
 configuration

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 529771dca0..801ac54574 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -180,7 +180,7 @@ legacy_tox_ini = """
         metric-routes: python tests/test_routes/runner.py
         workers: pytest tests/test_workers/
         classes: pytest tests/test_classes/
-        worker-persistence: pytest test/test_workers/worker_persistence/
+        worker-persistence: pytest tests/test_workers/worker_persistence/
 
     [pytest]
     addopts = -ra -s

From c64246264928be71fd90014dbb879d9963956c64 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Mon, 10 Nov 2025 17:09:00 -0500
Subject: [PATCH 007/104] Detect docker environments and ensure gunicorn error
 logs end up in dockers log stream

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/api/gunicorn_conf.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/augur/api/gunicorn_conf.py b/augur/api/gunicorn_conf.py
index 4e97d7b465..dd1bfc6961 100644
--- a/augur/api/gunicorn_conf.py
+++ b/augur/api/gunicorn_conf.py
@@ -39,9 +39,15 @@
 
 # set the log location for gunicorn    
 logs_directory = get_value('Logging', 'logs_directory')
+
+is_docker = os.getenv("AUGUR_DOCKER_DEPLOY").lower() in ('true', '1', 't', 'y', 'yes')
 accesslog = f"{logs_directory}/gunicorn.log"
 errorlog = f"{logs_directory}/gunicorn.log"
 
+# If deploying via docker, include gunicorn error logs in the docker log stream by sending it to stdout
+if is_docker:
+    errorlog = '-'
+
 ssl_bool = get_value('Server', 'ssl')
 
 if ssl_bool is True: 

From c952f662abe24c742e22b41f35a614d56f41f017 Mon Sep 17 00:00:00 2001
From: Xiaoha <blairjade183@gmail.com>
Date: Wed, 12 Nov 2025 20:25:12 +0000
Subject: [PATCH 008/104] fix: Use timezone-aware timestamps for topic modeling
 schema

- set training_start_time/end_time/data_collection_date to TIMESTAMPTZ
- update TopicModelMeta ORM to use timezone-aware columns
- align topic_model_event ts column with TIMESTAMPTZ requirement
- satisfies maintainer request for timezone data storage

Signed-off-by: Xiaoha <blairjade183@gmail.com>
---
 augur/application/db/models/augur_data.py                | 6 +++---
 .../alembic/versions/35_create_topic_model_meta_table.py | 6 +++---
 .../schema/alembic/versions/36_add_topic_model_event.py  | 9 +--------
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py
index 9751cfb5f4..ddf11e0532 100644
--- a/augur/application/db/models/augur_data.py
+++ b/augur/application/db/models/augur_data.py
@@ -3687,12 +3687,12 @@ class TopicModelMeta(Base):
         comment="JSON object containing visualization data for the model"
     )
     training_start_time = Column(
-        TIMESTAMP(),
+        TIMESTAMP(timezone=True),
         nullable=False,
         comment="When training started"
     )
     training_end_time = Column(
-        TIMESTAMP(),
+        TIMESTAMP(timezone=True),
         nullable=False,
         comment="When training ended"
     )
@@ -3700,7 +3700,7 @@ class TopicModelMeta(Base):
     tool_version = Column(String, comment="Standard Augur Metadata")
     data_source = Column(String, comment="Standard Augur Metadata")
     data_collection_date = Column(
-        TIMESTAMP(precision=0),
+        TIMESTAMP(timezone=True, precision=0),
         server_default=text("CURRENT_TIMESTAMP")
     )
 
diff --git a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
index 9e4a00c3cc..b1235365ba 100644
--- a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
+++ b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
@@ -49,9 +49,9 @@ def upgrade():
         sa.Column('visualization_data', postgresql.JSONB(), nullable=True),
         
         # Timestamps (NOT NULL with defaults)
-        sa.Column('training_start_time', sa.TIMESTAMP(), nullable=False),
-        sa.Column('training_end_time', sa.TIMESTAMP(), nullable=False),
-        sa.Column('data_collection_date', sa.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+        sa.Column('training_start_time', postgresql.TIMESTAMP(timezone=True), nullable=False),
+        sa.Column('training_end_time', postgresql.TIMESTAMP(timezone=True), nullable=False),
+        sa.Column('data_collection_date', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
         
         # Standard Augur metadata (NOT NULL)
         sa.Column('tool_source', sa.String(), nullable=False),
diff --git a/augur/application/schema/alembic/versions/36_add_topic_model_event.py b/augur/application/schema/alembic/versions/36_add_topic_model_event.py
index a2cb78ffd3..cfc7e5e813 100644
--- a/augur/application/schema/alembic/versions/36_add_topic_model_event.py
+++ b/augur/application/schema/alembic/versions/36_add_topic_model_event.py
@@ -22,7 +22,7 @@ def upgrade():
         sa.Column("event_id", sa.BigInteger(), primary_key=True),
         sa.Column(
             "ts",
-            sa.TIMESTAMP(),
+            postgresql.TIMESTAMP(timezone=True),
             server_default=sa.text("CURRENT_TIMESTAMP"),
             nullable=False,
         ),
@@ -46,16 +46,9 @@ def upgrade():
         "ix_tme_repo_ts", "topic_model_event", ["repo_id", "ts"], schema="augur_data"
     )
     op.create_index("ix_tme_event", "topic_model_event", ["event"], schema="augur_data")
-    # btree index on payload for exact match queries (following Augur conventions)
-    # Note: btree only supports equality comparison, not JSON containment queries
-    op.create_index(
-        "ix_tme_payload", "topic_model_event", ["payload"], 
-        unique=False, schema="augur_data"
-    )
 
 
 def downgrade():
-    op.drop_index("ix_tme_payload", table_name="topic_model_event", schema="augur_data")
     op.drop_index("ix_tme_event", table_name="topic_model_event", schema="augur_data")
     op.drop_index("ix_tme_repo_ts", table_name="topic_model_event", schema="augur_data")
     op.drop_table("topic_model_event", schema="augur_data")

From aa67f9b42b495fca3e08deb6a5b345208f080a42 Mon Sep 17 00:00:00 2001
From: Xiaoha <blairjade183@gmail.com>
Date: Wed, 12 Nov 2025 23:42:34 +0000
Subject: [PATCH 009/104] chore: rely on SQLAlchemy TIMESTAMP type with
 timezone

- switch Alembic migrations to use sa.TIMESTAMP(timezone=True)
- keeps timezone support while avoiding Postgres-specific type import

Signed-off-by: Xiaoha <blairjade183@gmail.com>
---
 .../alembic/versions/35_create_topic_model_meta_table.py    | 6 +++---
 .../schema/alembic/versions/36_add_topic_model_event.py     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
index b1235365ba..042155556f 100644
--- a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
+++ b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py
@@ -49,9 +49,9 @@ def upgrade():
         sa.Column('visualization_data', postgresql.JSONB(), nullable=True),
         
         # Timestamps (NOT NULL with defaults)
-        sa.Column('training_start_time', postgresql.TIMESTAMP(timezone=True), nullable=False),
-        sa.Column('training_end_time', postgresql.TIMESTAMP(timezone=True), nullable=False),
-        sa.Column('data_collection_date', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
+        sa.Column('training_start_time', sa.TIMESTAMP(timezone=True), nullable=False),
+        sa.Column('training_end_time', sa.TIMESTAMP(timezone=True), nullable=False),
+        sa.Column('data_collection_date', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False),
         
         # Standard Augur metadata (NOT NULL)
         sa.Column('tool_source', sa.String(), nullable=False),
diff --git a/augur/application/schema/alembic/versions/36_add_topic_model_event.py b/augur/application/schema/alembic/versions/36_add_topic_model_event.py
index cfc7e5e813..5359e8f10d 100644
--- a/augur/application/schema/alembic/versions/36_add_topic_model_event.py
+++ b/augur/application/schema/alembic/versions/36_add_topic_model_event.py
@@ -22,7 +22,7 @@ def upgrade():
         sa.Column("event_id", sa.BigInteger(), primary_key=True),
         sa.Column(
             "ts",
-            postgresql.TIMESTAMP(timezone=True),
+            sa.TIMESTAMP(timezone=True),
             server_default=sa.text("CURRENT_TIMESTAMP"),
             nullable=False,
         ),

From 46e5b69fa40396fd4258acaff7256b3a83d52f8a Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Tue, 4 Nov 2025 21:56:20 -0500
Subject: [PATCH 010/104] add flexible column order support for CSV imports

- add csv_utils.py with intelligent header detection
- refactor add-repos and add-repo-groups commands to use new CSV parser
- support both header and headerless CSV formats
- add automatic column detection for headerless CSVs
- add 10MB file size limit with clear error message
- update sample CSV files to include headers

Fixes #3310

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/application/cli/csv_utils.py            | 272 ++++++++++++++++++
 augur/application/cli/db.py                   | 168 ++++++++---
 .../schema/repo_group_load_sample.csv         |   1 +
 augur/application/schema/repo_load_sample.csv |   1 +
 .../test_repo_groups.csv                      |   1 +
 .../test_repos.csv                            |   1 +
 6 files changed, 399 insertions(+), 45 deletions(-)
 create mode 100644 augur/application/cli/csv_utils.py

diff --git a/augur/application/cli/csv_utils.py b/augur/application/cli/csv_utils.py
new file mode 100644
index 0000000000..2c0a2fcff3
--- /dev/null
+++ b/augur/application/cli/csv_utils.py
@@ -0,0 +1,272 @@
+# SPDX-License-Identifier: MIT
+"""
+CSV processing utilities for Augur CLI
+"""
+import csv
+import logging
+import os
+from typing import Dict, List, Tuple
+
+logger = logging.getLogger(__name__)
+
+# Constants
+MAX_FILE_SIZE_MB = 10
+MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
+
+
+class CSVProcessingError(Exception):
+    """Raised when CSV processing fails."""
+
+    pass
+
+
+def check_file_size(filename: str) -> None:
+    """Validate file size is under limit"""
+    size = os.path.getsize(filename)
+    if size > MAX_FILE_SIZE_BYTES:
+        size_mb = size / (1024 * 1024)
+        raise CSVProcessingError(
+            f"File size ({size_mb:.1f}MB) exceeds {MAX_FILE_SIZE_MB}MB limit. "
+            f"Consider splitting into smaller batches."
+        )
+
+
+def detect_headers(first_row: List[str], expected_columns: set) -> bool:
+    """Detect if first row contains column headers"""
+    normalized = {col.strip().lower() for col in first_row}
+    return expected_columns.issubset(normalized)
+
+
+def detect_column_mapping_repos(rows: List[List[str]]) -> Dict[str, int]:
+    """Detect which column contains URLs vs IDs for headerless repo CSVs"""
+    from augur.application.db.models import Repo
+
+    if not rows or len(rows[0]) != 2:
+        raise CSVProcessingError(
+            "Expected 2 columns (repo_url, repo_group_id). "
+            f"Found {len(rows[0]) if rows else 0} columns."
+        )
+
+    # Sample first 10 rows to determine column types
+    sample_size = min(10, len(rows))
+    sample_rows = rows[:sample_size]
+
+    # Test each column to see if it contains URLs
+    for col_idx in [0, 1]:
+        col_values = [row[col_idx] for row in sample_rows]
+
+        # Count how many values in this column parse as valid git URLs
+        url_matches = 0
+        for value in col_values:
+            value = value.strip()
+            github_parse = Repo.parse_github_repo_url(value)
+            gitlab_parse = Repo.parse_gitlab_repo_url(value)
+
+            if github_parse != (None, None) or gitlab_parse != (None, None):
+                url_matches += 1
+
+        # If >80% of values are valid URLs, this is the URL column
+        match_rate = url_matches / len(col_values)
+        if match_rate >= 0.8:
+            url_col = col_idx
+            id_col = 1 - col_idx  # The other column
+            return {"repo_url": url_col, "repo_group_id": id_col}
+
+    raise CSVProcessingError(
+        "Could not detect column types. Ensure CSV contains valid git repository URLs. "
+        "Or add headers: repo_url,repo_group_id"
+    )
+
+
+def detect_column_mapping_repo_groups(rows: List[List[str]]) -> Dict[str, int]:
+    """Detect which column contains IDs vs names for headerless repo group CSVs"""
+    if not rows or len(rows[0]) != 2:
+        raise CSVProcessingError(
+            "Expected 2 columns (repo_group_id, repo_group_name). "
+            f"Found {len(rows[0]) if rows else 0} columns."
+        )
+
+    # Sample first 10 rows
+    sample_size = min(10, len(rows))
+    sample_rows = rows[:sample_size]
+
+    # Test each column to see if it contains integers
+    for col_idx in [0, 1]:
+        col_values = [row[col_idx] for row in sample_rows]
+
+        # Count how many values are positive integers
+        int_matches = 0
+        for value in col_values:
+            try:
+                if int(value.strip()) > 0:
+                    int_matches += 1
+            except (ValueError, AttributeError):
+                pass
+
+        # If >80% of values are integers, this is the ID column
+        match_rate = int_matches / len(col_values)
+        if match_rate >= 0.8:
+            id_col = col_idx
+            name_col = 1 - col_idx  # The other column
+            return {"repo_group_id": id_col, "repo_group_name": name_col}
+
+    raise CSVProcessingError(
+        "Could not detect column types. Ensure CSV has valid format. "
+        "Or add headers: repo_group_id,repo_group_name"
+    )
+
+
+def process_repo_csv(filename: str) -> List[Dict[str, str]]:
+    """Process repository CSV file with intelligent header detection"""
+    check_file_size(filename)
+
+    rows = []
+
+    with open(filename, "r", newline="") as f:
+        # Read first line to detect headers
+        first_line = f.readline()
+        f.seek(0)
+
+        first_row = next(csv.reader([first_line]))
+        has_headers = detect_headers(first_row, {"repo_url", "repo_group_id"})
+
+        if has_headers:
+            logger.info("CSV has headers, using DictReader")
+            reader = csv.DictReader(f)
+
+            # Normalize fieldnames
+            reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames]
+
+            # Validate required columns present
+            required = {"repo_url", "repo_group_id"}
+            if not required.issubset(set(reader.fieldnames)):
+                missing = required - set(reader.fieldnames)
+                raise CSVProcessingError(
+                    f"Missing required columns: {missing}. "
+                    f"Expected: repo_url, repo_group_id"
+                )
+
+            for line_num, row in enumerate(reader, start=2):
+                row_normalized = {k.strip().lower(): v.strip() for k, v in row.items()}
+                rows.append(row_normalized)
+
+        else:
+            logger.info("CSV has no headers, using intelligent column detection")
+            # Read all rows
+            all_rows = list(csv.reader(f))
+
+            if not all_rows:
+                raise CSVProcessingError("CSV file is empty")
+
+            # Detect which column is which
+            col_mapping = detect_column_mapping_repos(all_rows)
+
+            # Convert to dicts
+            for line_num, row in enumerate(all_rows, start=1):
+                if len(row) != 2:
+                    logger.warning(
+                        f"Line {line_num}: Expected 2 columns, got {len(row)}, skipping"
+                    )
+                    continue
+
+                row_dict = {
+                    "repo_url": row[col_mapping["repo_url"]].strip(),
+                    "repo_group_id": row[col_mapping["repo_group_id"]].strip(),
+                }
+                rows.append(row_dict)
+
+    logger.info(f"Parsed {len(rows)} rows from CSV")
+    return rows
+
+
+def process_repo_group_csv(filename: str) -> List[Dict[str, str]]:
+    """Process repository group CSV file with intelligent header detection"""
+    check_file_size(filename)
+
+    rows = []
+
+    with open(filename, "r", newline="") as f:
+        # Read first line to detect headers
+        first_line = f.readline()
+        f.seek(0)
+
+        first_row = next(csv.reader([first_line]))
+        has_headers = detect_headers(first_row, {"repo_group_id", "repo_group_name"})
+
+        if has_headers:
+            logger.info("CSV has headers, using DictReader")
+            reader = csv.DictReader(f)
+
+            # Normalize fieldnames
+            reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames]
+
+            # Validate required columns present
+            required = {"repo_group_id", "repo_group_name"}
+            if not required.issubset(set(reader.fieldnames)):
+                missing = required - set(reader.fieldnames)
+                raise CSVProcessingError(
+                    f"Missing required columns: {missing}. "
+                    f"Expected: repo_group_id, repo_group_name"
+                )
+
+            for line_num, row in enumerate(reader, start=2):
+                row_normalized = {k.strip().lower(): v.strip() for k, v in row.items()}
+
+                # Skip empty rows
+                if not row_normalized.get("repo_group_id") or not row_normalized.get(
+                    "repo_group_name"
+                ):
+                    continue
+
+                rows.append(row_normalized)
+
+        else:
+            logger.info("CSV has no headers, using intelligent column detection")
+            # Read all rows
+            all_rows = list(csv.reader(f))
+
+            if not all_rows:
+                raise CSVProcessingError("CSV file is empty")
+
+            # Detect which column is which
+            col_mapping = detect_column_mapping_repo_groups(all_rows)
+
+            # Convert to dicts
+            for line_num, row in enumerate(all_rows, start=1):
+                if len(row) != 2:
+                    logger.warning(
+                        f"Line {line_num}: Expected 2 columns, got {len(row)}, skipping"
+                    )
+                    continue
+
+                # Skip empty rows
+                if not row[0].strip() or not row[1].strip():
+                    continue
+
+                row_dict = {
+                    "repo_group_id": row[col_mapping["repo_group_id"]].strip(),
+                    "repo_group_name": row[col_mapping["repo_group_name"]].strip(),
+                }
+                rows.append(row_dict)
+
+    logger.info(f"Parsed {len(rows)} rows from CSV")
+    return rows
+
+
+def write_rejection_file(filename: str, rejections: List[Tuple[Dict, str]]) -> str:
+    """Write rejected rows to a .rejected.csv file"""
+    if not rejections:
+        return None
+
+    rejection_file = f"{filename}.rejected.csv"
+
+    with open(rejection_file, "w", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow(["original_data", "rejection_reason"])
+
+        for row_dict, reason in rejections:
+            original_data = ",".join(str(v) for v in row_dict.values())
+            writer.writerow([original_data, reason])
+
+    logger.info(f"Wrote {len(rejections)} rejections to {rejection_file}")
+    return rejection_file
diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py
index c20fcf0b2e..20fec42412 100644
--- a/augur/application/cli/db.py
+++ b/augur/application/cli/db.py
@@ -1,12 +1,11 @@
 # SPDX-License-Identifier: MIT
 import os
-from os import environ, chmod, path, getenv, stat
+from os import environ, chmod, path, getenv
 import logging
 from sys import exit
 from subprocess import call
 import random
 import string
-import csv
 import click
 import sqlalchemy as s
 import pandas as pd
@@ -25,6 +24,12 @@
 from sqlalchemy import update
 from datetime import datetime
 from augur.application.db.models import Repo
+from augur.application.cli.csv_utils import (
+    process_repo_csv,
+    process_repo_group_csv,
+    write_rejection_file,
+    CSVProcessingError,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -42,9 +47,14 @@ def cli(ctx):
 @with_database
 @click.pass_context
 def add_repos(ctx, filename):
-    """Add repositories to Augur's database.
+    """Add repositories to Augur's database from a CSV file.
+
+    The CSV file can have headers (recommended):
+        repo_url,repo_group_id
+        https://github.com/chaoss/augur.git,10
 
-    The .csv file format should be repo_url,group_id
+    Or no headers (backward compatible - column order will be auto-detected):
+        https://github.com/chaoss/augur.git,10
 
     NOTE: The Group ID must already exist in the REPO_Groups Table.
 
@@ -55,30 +65,59 @@ def add_repos(ctx, filename):
     with GithubTaskSession(logger, engine=ctx.obj.engine) as session:
         controller = RepoLoadController(session)
 
-        line_total = len(open(filename).readlines())
-        with open(filename) as upload_repos_file:
-            data = csv.reader(upload_repos_file, delimiter=",")
-            for line_num, row in enumerate(data):
-                repo_data = {}
-                repo_data["url"] = row[0]
+        try:
+            # Parse CSV (handles headers and column detection)
+            rows = process_repo_csv(filename)
+
+            if not rows:
+                logger.error("No valid rows found in CSV file")
+                return
+
+            logger.info(f"Processing {len(rows)} repositories...")
+
+            # Process each row using EXISTING logic
+            successful = 0
+            rejections = []
+
+            for row in rows:
                 try:
-                    repo_data["repo_group_id"] = int(row[1])
-                except ValueError:
-                    print(
-                        f"Invalid repo group_id: {row[1]} for Git url: `{repo_data['url']}`"
-                    )
+                    repo_data = {
+                        "url": row["repo_url"],
+                        "repo_group_id": int(row["repo_group_id"]),
+                    }
+                except (ValueError, KeyError) as e:
+                    logger.warning(f"Invalid data format: {row}, error: {e}")
+                    rejections.append((row, f"Invalid format: {e}"))
                     continue
 
                 print(
-                    f"Inserting repo {line_num}/{line_total} with Git URL `{repo_data['url']}` into repo group {repo_data['repo_group_id']}"
+                    f"Inserting repo with Git URL `{repo_data['url']}` into repo group {repo_data['repo_group_id']}"
                 )
 
                 succeeded, message = controller.add_cli_repo(repo_data)
-                if not succeeded:
-                    logger.error(f"insert repo failed with error: {message['status']}`")
-                else:
+                if succeeded:
+                    successful += 1
                     logger.info(f"Repo added: {repo_data}")
                     print("Success")
+                else:
+                    logger.error(f"insert repo failed with error: {message['status']}")
+                    rejections.append((row, f"Failed to add repo: {message['status']}"))
+
+            logger.info(f"Successfully added {successful} repositories")
+
+            if rejections:
+                rejection_file = write_rejection_file(filename, rejections)
+                logger.warning(
+                    f"{len(rejections)} repositories failed. "
+                    f"See {rejection_file} for details."
+                )
+
+        except CSVProcessingError as e:
+            logger.error(f"CSV processing error: {e}")
+            return
+        except Exception as e:
+            logger.error(f"Unexpected error: {e}")
+            raise
 
 
 @cli.command("get-repo-groups")
@@ -113,40 +152,79 @@ def add_repo_groups(ctx, filename):
     """
     Create new repo groups in Augur's database
     """
-    with ctx.obj.engine.begin() as connection:
-        df = pd.read_sql(
-            s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups"),
-            connection,
-        )
-        repo_group_IDs = df["repo_group_id"].values.tolist()
-
-        insert_repo_group_sql = s.sql.text(
+    try:
+        # Parse CSV (handles headers and column detection)
+        rows = process_repo_group_csv(filename)
+
+        if not rows:
+            logger.error("No valid rows found in CSV file")
+            return
+
+        logger.info(f"Processing {len(rows)} repository groups...")
+
+        with ctx.obj.engine.begin() as connection:
+            # Get existing repo group IDs
+            df = pd.read_sql(
+                s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups"),
+                connection,
+            )
+            repo_group_IDs = df["repo_group_id"].values.tolist()
+
+            insert_repo_group_sql = s.sql.text(
+                """
+            INSERT INTO "augur_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP);
             """
-        INSERT INTO "augur_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP);
-        """
-        )
+            )
+
+            # Process each row
+            successful = 0
+            rejections = []
+
+            for row in rows:
+                try:
+                    group_id = int(row["repo_group_id"])
+                    group_name = row["repo_group_name"]
+                except (ValueError, KeyError) as e:
+                    logger.warning(f"Invalid data format: {row}, error: {e}")
+                    rejections.append((row, f"Invalid format: {e}"))
+                    continue
 
-        with open(filename) as create_repo_groups_file:
-            data = csv.reader(create_repo_groups_file, delimiter=",")
-            for row in data:
-                # Handle case where there's a hanging empty row.
-                if not row:
-                    logger.info("Skipping empty data...")
+                # Check if already exists
+                if group_id in repo_group_IDs:
+                    logger.info(f"Repo group {group_id} already exists, skipping")
                     continue
 
-                logger.info(f"Inserting repo group with values {row}...")
-                if int(row[0]) not in repo_group_IDs:
-                    repo_group_IDs.append(int(row[0]))
+                try:
+                    logger.info(
+                        f"Inserting repo group: ID={group_id}, Name={group_name}"
+                    )
                     connection.execute(
                         insert_repo_group_sql.bindparams(
-                            repo_group_id=int(row[0]),
-                            repo_group_name=row[1],
+                            repo_group_id=group_id,
+                            repo_group_name=group_name,
                         )
                     )
-                else:
-                    logger.info(
-                        f"Repo group with ID {row[1]} for repo group {row[1]} already exists, skipping..."
-                    )
+                    successful += 1
+                    repo_group_IDs.append(group_id)
+                except Exception as e:
+                    logger.error(f"Failed to insert repo group {group_id}: {e}")
+                    rejections.append((row, f"Database error: {e}"))
+
+            logger.info(f"Successfully added {successful} repository groups")
+
+            if rejections:
+                rejection_file = write_rejection_file(filename, rejections)
+                logger.warning(
+                    f"{len(rejections)} groups failed. "
+                    f"See {rejection_file} for details."
+                )
+
+    except CSVProcessingError as e:
+        logger.error(f"CSV processing error: {e}")
+        return
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}")
+        raise
 
 
 @cli.command("add-github-org")
diff --git a/augur/application/schema/repo_group_load_sample.csv b/augur/application/schema/repo_group_load_sample.csv
index ab27193e2a..8b5ada1413 100644
--- a/augur/application/schema/repo_group_load_sample.csv
+++ b/augur/application/schema/repo_group_load_sample.csv
@@ -1,2 +1,3 @@
+repo_group_id,repo_group_name
 10,Repo Group 1
 20,Repo Group 2
\ No newline at end of file
diff --git a/augur/application/schema/repo_load_sample.csv b/augur/application/schema/repo_load_sample.csv
index fb537d4949..ee11bb5ad5 100644
--- a/augur/application/schema/repo_load_sample.csv
+++ b/augur/application/schema/repo_load_sample.csv
@@ -1,3 +1,4 @@
+repo_url,repo_group_id
 https://github.com/chaoss/augur.git,10
 https://github.com/chaoss/grimoirelab.git,10
 https://github.com/chaoss/wg-evolution.git,20
diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv
index 8001523560..463da48bde 100644
--- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv
+++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv
@@ -1,2 +1,3 @@
+repo_group_id,repo_group_name
 10,Repo Group 1
 20,Repo Group 2
diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv
index fb537d4949..ee11bb5ad5 100644
--- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv
+++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv
@@ -1,3 +1,4 @@
+repo_url,repo_group_id
 https://github.com/chaoss/augur.git,10
 https://github.com/chaoss/grimoirelab.git,10
 https://github.com/chaoss/wg-evolution.git,20

From b2d061437cfbad68441496fdb061ba3c4f369c4e Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Wed, 5 Nov 2025 14:12:24 -0500
Subject: [PATCH 011/104] remove header rows from test CSV files for repo
 groups and repos

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 .../test_facade_contributor_interface/test_repo_groups.csv       | 1 -
 .../test_facade/test_facade_contributor_interface/test_repos.csv | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv
index 463da48bde..8001523560 100644
--- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv
+++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv
@@ -1,3 +1,2 @@
-repo_group_id,repo_group_name
 10,Repo Group 1
 20,Repo Group 2
diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv
index ee11bb5ad5..fb537d4949 100644
--- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv
+++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv
@@ -1,4 +1,3 @@
-repo_url,repo_group_id
 https://github.com/chaoss/augur.git,10
 https://github.com/chaoss/grimoirelab.git,10
 https://github.com/chaoss/wg-evolution.git,20

From 515adc1bf5d8efd884d4b2d9accf3eee25d52bbf Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Wed, 5 Nov 2025 15:24:58 -0500
Subject: [PATCH 012/104] improve CSV processing error handling and logging in
 db commands

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/application/cli/csv_utils.py | 333 +++++++++++------------------
 augur/application/cli/db.py        |  26 +--
 2 files changed, 140 insertions(+), 219 deletions(-)

diff --git a/augur/application/cli/csv_utils.py b/augur/application/cli/csv_utils.py
index 2c0a2fcff3..af7821c35a 100644
--- a/augur/application/cli/csv_utils.py
+++ b/augur/application/cli/csv_utils.py
@@ -5,268 +5,195 @@
 import csv
 import logging
 import os
-from typing import Dict, List, Tuple
 
 logger = logging.getLogger(__name__)
 
-# Constants
 MAX_FILE_SIZE_MB = 10
 MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
 
 
-class CSVProcessingError(Exception):
-    """Raised when CSV processing fails."""
+def validate_git_url(value: str) -> bool:
+    """Validate if value is a valid git repository URL"""
 
-    pass
+    from augur.application.db.models import Repo
+    
+    value = value.strip()
+    github_parse = Repo.parse_github_repo_url(value)
+    gitlab_parse = Repo.parse_gitlab_repo_url(value)
+    return github_parse != (None, None) or gitlab_parse != (None, None)
 
 
-def check_file_size(filename: str) -> None:
-    """Validate file size is under limit"""
-    size = os.path.getsize(filename)
-    if size > MAX_FILE_SIZE_BYTES:
-        size_mb = size / (1024 * 1024)
-        raise CSVProcessingError(
-            f"File size ({size_mb:.1f}MB) exceeds {MAX_FILE_SIZE_MB}MB limit. "
-            f"Consider splitting into smaller batches."
-        )
+def validate_positive_int(value: str) -> bool:
+    """Validate if value is a positive integer"""
 
+    try:
+        return int(value.strip()) > 0
+    except (ValueError, AttributeError):
+        return False
 
-def detect_headers(first_row: List[str], expected_columns: set) -> bool:
-    """Detect if first row contains column headers"""
-    normalized = {col.strip().lower() for col in first_row}
-    return expected_columns.issubset(normalized)
 
+def detect_column_order(sample_rows: list, validators: dict) -> dict:
+    """Detect column order by testing validators against sample data."""
 
-def detect_column_mapping_repos(rows: List[List[str]]) -> Dict[str, int]:
-    """Detect which column contains URLs vs IDs for headerless repo CSVs"""
-    from augur.application.db.models import Repo
-
-    if not rows or len(rows[0]) != 2:
-        raise CSVProcessingError(
-            "Expected 2 columns (repo_url, repo_group_id). "
-            f"Found {len(rows[0]) if rows else 0} columns."
+    if not sample_rows or len(sample_rows[0]) != len(validators):
+        raise ValueError(
+            f"Expected {len(validators)} columns. "
+            f"Found {len(sample_rows[0]) if sample_rows else 0} columns."
         )
 
     # Sample first 10 rows to determine column types
-    sample_size = min(10, len(rows))
-    sample_rows = rows[:sample_size]
-
-    # Test each column to see if it contains URLs
-    for col_idx in [0, 1]:
-        col_values = [row[col_idx] for row in sample_rows]
-
-        # Count how many values in this column parse as valid git URLs
-        url_matches = 0
-        for value in col_values:
-            value = value.strip()
-            github_parse = Repo.parse_github_repo_url(value)
-            gitlab_parse = Repo.parse_gitlab_repo_url(value)
-
-            if github_parse != (None, None) or gitlab_parse != (None, None):
-                url_matches += 1
-
-        # If >80% of values are valid URLs, this is the URL column
-        match_rate = url_matches / len(col_values)
-        if match_rate >= 0.8:
-            url_col = col_idx
-            id_col = 1 - col_idx  # The other column
-            return {"repo_url": url_col, "repo_group_id": id_col}
-
-    raise CSVProcessingError(
-        "Could not detect column types. Ensure CSV contains valid git repository URLs. "
-        "Or add headers: repo_url,repo_group_id"
-    )
-
+    sample_size = min(10, len(sample_rows))
+    sample_data = sample_rows[:sample_size]
+
+    # Try to match each validator to a column using 80% threshold
+    column_mapping = {}
+    used_indices = set()
+
+    for col_name, validator in validators.items():
+        best_match_idx = None
+
+        # Test each column
+        for col_idx in range(len(sample_data[0])):
+            if col_idx in used_indices:
+                continue
+
+            # Count how many values in this column pass validation
+            matches = 0
+            for row in sample_data:
+                if col_idx < len(row) and validator(row[col_idx]):
+                    matches += 1
+
+            # If >80% of values pass validation, this is the correct column
+            match_rate = matches / len(sample_data)
+            if match_rate >= 0.8:
+                best_match_idx = col_idx
+                break
+
+        if best_match_idx is not None:
+            column_mapping[col_name] = best_match_idx
+            used_indices.add(best_match_idx)
+        else:
+            # No match found for this column
+            raise ValueError(
+                f"Could not detect column '{col_name}'. "
+                f"Ensure CSV has valid format or add headers: {', '.join(validators.keys())}"
+            )
 
-def detect_column_mapping_repo_groups(rows: List[List[str]]) -> Dict[str, int]:
-    """Detect which column contains IDs vs names for headerless repo group CSVs"""
-    if not rows or len(rows[0]) != 2:
-        raise CSVProcessingError(
-            "Expected 2 columns (repo_group_id, repo_group_name). "
-            f"Found {len(rows[0]) if rows else 0} columns."
-        )
+    return column_mapping
 
-    # Sample first 10 rows
-    sample_size = min(10, len(rows))
-    sample_rows = rows[:sample_size]
-
-    # Test each column to see if it contains integers
-    for col_idx in [0, 1]:
-        col_values = [row[col_idx] for row in sample_rows]
-
-        # Count how many values are positive integers
-        int_matches = 0
-        for value in col_values:
-            try:
-                if int(value.strip()) > 0:
-                    int_matches += 1
-            except (ValueError, AttributeError):
-                pass
-
-        # If >80% of values are integers, this is the ID column
-        match_rate = int_matches / len(col_values)
-        if match_rate >= 0.8:
-            id_col = col_idx
-            name_col = 1 - col_idx  # The other column
-            return {"repo_group_id": id_col, "repo_group_name": name_col}
-
-    raise CSVProcessingError(
-        "Could not detect column types. Ensure CSV has valid format. "
-        "Or add headers: repo_group_id,repo_group_name"
-    )
 
+def process_csv(filename: str, expected_columns: dict) -> list:
+    """
+    Generic CSV processor with header detection.
 
-def process_repo_csv(filename: str) -> List[Dict[str, str]]:
-    """Process repository CSV file with intelligent header detection"""
-    check_file_size(filename)
+    Uses DictReader for both header and headerless CSVs by detecting column order
+    and reassigning fieldnames when necessary.
+    """
+    
+    # Validate file size
+    size = os.path.getsize(filename)
+    if size > MAX_FILE_SIZE_BYTES:
+        size_mb = size / (1024 * 1024)
+        raise ValueError(
+            f"File size ({size_mb:.1f}MB) exceeds {MAX_FILE_SIZE_MB}MB limit. "
+            f"Consider splitting into smaller batches."
+        )
 
     rows = []
 
     with open(filename, "r", newline="") as f:
-        # Read first line to detect headers
-        first_line = f.readline()
-        f.seek(0)
-
-        first_row = next(csv.reader([first_line]))
-        has_headers = detect_headers(first_row, {"repo_url", "repo_group_id"})
-
-        if has_headers:
-            logger.info("CSV has headers, using DictReader")
-            reader = csv.DictReader(f)
-
-            # Normalize fieldnames
-            reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames]
-
-            # Validate required columns present
-            required = {"repo_url", "repo_group_id"}
-            if not required.issubset(set(reader.fieldnames)):
-                missing = required - set(reader.fieldnames)
-                raise CSVProcessingError(
-                    f"Missing required columns: {missing}. "
-                    f"Expected: repo_url, repo_group_id"
-                )
-
-            for line_num, row in enumerate(reader, start=2):
-                row_normalized = {k.strip().lower(): v.strip() for k, v in row.items()}
-                rows.append(row_normalized)
+        # Create DictReader - it will auto-read first row as fieldnames
+        reader = csv.DictReader(f)
 
-        else:
-            logger.info("CSV has no headers, using intelligent column detection")
-            # Read all rows
-            all_rows = list(csv.reader(f))
+        # Check if auto-detected fieldnames are actual headers or data
+        detected_fieldnames = reader.fieldnames
+        if detected_fieldnames is None:
+            raise ValueError("CSV file is empty")
 
-            if not all_rows:
-                raise CSVProcessingError("CSV file is empty")
+        # Normalize and check if they match expected columns
+        normalized_fieldnames = {fn.strip().lower() for fn in detected_fieldnames}
+        expected_column_names = set(expected_columns.keys())
 
-            # Detect which column is which
-            col_mapping = detect_column_mapping_repos(all_rows)
-
-            # Convert to dicts
-            for line_num, row in enumerate(all_rows, start=1):
-                if len(row) != 2:
-                    logger.warning(
-                        f"Line {line_num}: Expected 2 columns, got {len(row)}, skipping"
-                    )
-                    continue
-
-                row_dict = {
-                    "repo_url": row[col_mapping["repo_url"]].strip(),
-                    "repo_group_id": row[col_mapping["repo_group_id"]].strip(),
-                }
-                rows.append(row_dict)
-
-    logger.info(f"Parsed {len(rows)} rows from CSV")
-    return rows
-
-
-def process_repo_group_csv(filename: str) -> List[Dict[str, str]]:
-    """Process repository group CSV file with intelligent header detection"""
-    check_file_size(filename)
-
-    rows = []
-
-    with open(filename, "r", newline="") as f:
-        # Read first line to detect headers
-        first_line = f.readline()
-        f.seek(0)
-
-        first_row = next(csv.reader([first_line]))
-        has_headers = detect_headers(first_row, {"repo_group_id", "repo_group_name"})
+        has_headers = expected_column_names.issubset(normalized_fieldnames)
 
         if has_headers:
+            # Headers exist - proceed normally with DictReader
             logger.info("CSV has headers, using DictReader")
-            reader = csv.DictReader(f)
 
-            # Normalize fieldnames
+            # Normalize fieldnames for consistent access
             reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames]
 
             # Validate required columns present
-            required = {"repo_group_id", "repo_group_name"}
-            if not required.issubset(set(reader.fieldnames)):
-                missing = required - set(reader.fieldnames)
-                raise CSVProcessingError(
+            if not expected_column_names.issubset(set(reader.fieldnames)):
+                missing = expected_column_names - set(reader.fieldnames)
+                raise ValueError(
                     f"Missing required columns: {missing}. "
-                    f"Expected: repo_group_id, repo_group_name"
+                    f"Expected: {', '.join(expected_column_names)}"
                 )
 
-            for line_num, row in enumerate(reader, start=2):
+            # Process all rows
+            for row in reader:
                 row_normalized = {k.strip().lower(): v.strip() for k, v in row.items()}
-
-                # Skip empty rows
-                if not row_normalized.get("repo_group_id") or not row_normalized.get(
-                    "repo_group_name"
-                ):
-                    continue
-
                 rows.append(row_normalized)
 
         else:
+            # No headers - detected_fieldnames are actually data
             logger.info("CSV has no headers, using intelligent column detection")
-            # Read all rows
+
+            # We need to:
+            # 1. Read more rows to sample for column detection
+            # 2. Detect column order
+            # 3. Process first row (which is in detected_fieldnames) manually
+            # 4. Continue with remaining rows
+
+            # Seek back to start and read all rows as raw data
+            f.seek(0)
             all_rows = list(csv.reader(f))
 
             if not all_rows:
-                raise CSVProcessingError("CSV file is empty")
+                raise ValueError("CSV file is empty")
 
-            # Detect which column is which
-            col_mapping = detect_column_mapping_repo_groups(all_rows)
+            # Detect column order using sample rows
+            col_mapping = detect_column_order(all_rows, expected_columns)
 
-            # Convert to dicts
-            for line_num, row in enumerate(all_rows, start=1):
-                if len(row) != 2:
+            # Process all rows with detected column order
+            for row in all_rows:
+                if len(row) != len(expected_columns):
                     logger.warning(
-                        f"Line {line_num}: Expected 2 columns, got {len(row)}, skipping"
+                        f"Expected {len(expected_columns)} columns, got {len(row)}, skipping"
                     )
                     continue
 
-                # Skip empty rows
-                if not row[0].strip() or not row[1].strip():
-                    continue
+                # Build dict using detected column mapping
+                row_dict = {}
+                for col_name, col_idx in col_mapping.items():
+                    row_dict[col_name] = row[col_idx].strip()
 
-                row_dict = {
-                    "repo_group_id": row[col_mapping["repo_group_id"]].strip(),
-                    "repo_group_name": row[col_mapping["repo_group_name"]].strip(),
-                }
                 rows.append(row_dict)
 
     logger.info(f"Parsed {len(rows)} rows from CSV")
     return rows
 
 
-def write_rejection_file(filename: str, rejections: List[Tuple[Dict, str]]) -> str:
-    """Write rejected rows to a .rejected.csv file"""
-    if not rejections:
-        return None
-
-    rejection_file = f"{filename}.rejected.csv"
+def process_repo_csv(filename: str) -> list:
+    """Process repository CSV file with intelligent header detection"""
 
-    with open(rejection_file, "w", newline="") as f:
-        writer = csv.writer(f)
-        writer.writerow(["original_data", "rejection_reason"])
+    return process_csv(
+        filename,
+        expected_columns={
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        },
+    )
 
-        for row_dict, reason in rejections:
-            original_data = ",".join(str(v) for v in row_dict.values())
-            writer.writerow([original_data, reason])
 
-    logger.info(f"Wrote {len(rejections)} rejections to {rejection_file}")
-    return rejection_file
+def process_repo_group_csv(filename: str) -> list:
+    """Process repository group CSV file with intelligent header detection"""
+    
+    return process_csv(
+        filename,
+        expected_columns={
+            "repo_group_id": validate_positive_int,
+            "repo_group_name": lambda v: bool(v.strip()),
+        },
+    )
diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py
index 20fec42412..e0df763dab 100644
--- a/augur/application/cli/db.py
+++ b/augur/application/cli/db.py
@@ -27,8 +27,6 @@
 from augur.application.cli.csv_utils import (
     process_repo_csv,
     process_repo_group_csv,
-    write_rejection_file,
-    CSVProcessingError,
 )
 
 logger = logging.getLogger(__name__)
@@ -79,7 +77,7 @@ def add_repos(ctx, filename):
             successful = 0
             rejections = []
 
-            for row in rows:
+            for idx, row in enumerate(rows, start=1):
                 try:
                     repo_data = {
                         "url": row["repo_url"],
@@ -91,7 +89,7 @@ def add_repos(ctx, filename):
                     continue
 
                 print(
-                    f"Inserting repo with Git URL `{repo_data['url']}` into repo group {repo_data['repo_group_id']}"
+                    f"Inserting repo {idx}/{len(rows)} with Git URL `{repo_data['url']}` into repo group {repo_data['repo_group_id']}"
                 )
 
                 succeeded, message = controller.add_cli_repo(repo_data)
@@ -106,13 +104,11 @@ def add_repos(ctx, filename):
             logger.info(f"Successfully added {successful} repositories")
 
             if rejections:
-                rejection_file = write_rejection_file(filename, rejections)
-                logger.warning(
-                    f"{len(rejections)} repositories failed. "
-                    f"See {rejection_file} for details."
-                )
+                logger.warning(f"{len(rejections)} repositories failed:")
+                for row_data, reason in rejections:
+                    logger.warning(f"  - {row_data}: {reason}")
 
-        except CSVProcessingError as e:
+        except ValueError as e:
             logger.error(f"CSV processing error: {e}")
             return
         except Exception as e:
@@ -213,13 +209,11 @@ def add_repo_groups(ctx, filename):
             logger.info(f"Successfully added {successful} repository groups")
 
             if rejections:
-                rejection_file = write_rejection_file(filename, rejections)
-                logger.warning(
-                    f"{len(rejections)} groups failed. "
-                    f"See {rejection_file} for details."
-                )
+                logger.warning(f"{len(rejections)} repository groups failed:")
+                for row_data, reason in rejections:
+                    logger.warning(f"  - {row_data}: {reason}")
 
-    except CSVProcessingError as e:
+    except ValueError as e:
         logger.error(f"CSV processing error: {e}")
         return
     except Exception as e:

From b42333f62591726c7533b933676ba8a7c329c749 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Sat, 8 Nov 2025 14:28:03 -0500
Subject: [PATCH 013/104] enhance type annotations and docstrings for CSV
 processing functions in cli

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/application/cli/csv_utils.py | 105 ++++++++++++++++++++++++-----
 augur/application/cli/db.py        |  30 +++++++--
 2 files changed, 112 insertions(+), 23 deletions(-)

diff --git a/augur/application/cli/csv_utils.py b/augur/application/cli/csv_utils.py
index af7821c35a..e55835f6e5 100644
--- a/augur/application/cli/csv_utils.py
+++ b/augur/application/cli/csv_utils.py
@@ -5,6 +5,7 @@
 import csv
 import logging
 import os
+from typing import Callable
 
 logger = logging.getLogger(__name__)
 
@@ -13,10 +14,19 @@
 
 
 def validate_git_url(value: str) -> bool:
-    """Validate if value is a valid git repository URL"""
+    """Validate if value is a valid git repository URL.
 
+    Checks if the provided string is a valid GitHub or GitLab repository URL
+    using the Repo model's URL parsing methods.
+
+    Args:
+        value: String to validate as a git repository URL
+
+    Returns:
+        True if the value is a valid GitHub or GitLab URL, False otherwise
+    """
     from augur.application.db.models import Repo
-    
+
     value = value.strip()
     github_parse = Repo.parse_github_repo_url(value)
     gitlab_parse = Repo.parse_gitlab_repo_url(value)
@@ -24,17 +34,40 @@ def validate_git_url(value: str) -> bool:
 
 
 def validate_positive_int(value: str) -> bool:
-    """Validate if value is a positive integer"""
+    """Validate if value is a positive integer.
+
+    Args:
+        value: String to validate as a positive integer
 
+    Returns:
+        True if the value can be converted to a positive integer, False otherwise
+    """
     try:
         return int(value.strip()) > 0
     except (ValueError, AttributeError):
         return False
 
 
-def detect_column_order(sample_rows: list, validators: dict) -> dict:
-    """Detect column order by testing validators against sample data."""
+def detect_column_order(
+    sample_rows: list[list[str]], validators: dict[str, Callable[[str], bool]]
+) -> dict[str, int]:
+    """Detect column order by testing validators against sample data.
 
+    For headerless CSV files, this function determines which column index
+    corresponds to which expected field by validating sample data against
+    validator functions. Uses an 80% threshold for matching.
+
+    Args:
+        sample_rows: List of rows from CSV file, where each row is a list of strings
+        validators: Dictionary mapping column names to validator functions
+
+    Returns:
+        Dictionary mapping column names to their detected column indices
+
+    Raises:
+        ValueError: If column count doesn't match expected validators or if
+                   a column cannot be detected with sufficient confidence
+    """
     if not sample_rows or len(sample_rows[0]) != len(validators):
         raise ValueError(
             f"Expected {len(validators)} columns. "
@@ -82,14 +115,28 @@ def detect_column_order(sample_rows: list, validators: dict) -> dict:
     return column_mapping
 
 
-def process_csv(filename: str, expected_columns: dict) -> list:
-    """
-    Generic CSV processor with header detection.
+def process_csv(
+    filename: str, expected_columns: dict[str, Callable[[str], bool]]
+) -> list[dict[str, str]]:
+    """Generic CSV processor with header detection.
+
+    Processes CSV files with or without headers by automatically detecting the
+    column order. For files with headers, uses DictReader directly. For headerless
+    files, detects column order by validating sample data against expected validators.
 
-    Uses DictReader for both header and headerless CSVs by detecting column order
-    and reassigning fieldnames when necessary.
+    Args:
+        filename: Path to the CSV file to process
+        expected_columns: Dictionary mapping column names to validator functions
+                         that check if a value is valid for that column
+
+    Returns:
+        List of dictionaries, where each dictionary represents a row with
+        column names as keys and cell values as strings
+
+    Raises:
+        ValueError: If file is empty, exceeds size limit, has wrong number of
+                   columns, or missing required headers
     """
-    
     # Validate file size
     size = os.path.getsize(filename)
     if size > MAX_FILE_SIZE_BYTES:
@@ -99,7 +146,7 @@ def process_csv(filename: str, expected_columns: dict) -> list:
             f"Consider splitting into smaller batches."
         )
 
-    rows = []
+    rows: list[dict[str, str]] = []
 
     with open(filename, "r", newline="") as f:
         # Create DictReader - it will auto-read first row as fieldnames
@@ -175,9 +222,21 @@ def process_csv(filename: str, expected_columns: dict) -> list:
     return rows
 
 
-def process_repo_csv(filename: str) -> list:
-    """Process repository CSV file with intelligent header detection"""
+def process_repo_csv(filename: str) -> list[dict[str, str]]:
+    """Process repository CSV file with header detection.
+
+    Processes a CSV file containing repository information with columns for
+    repo_url and repo_group_id. Supports both header and headerless formats.
 
+    Args:
+        filename: Path to the repository CSV file
+
+    Returns:
+        List of dictionaries with keys 'repo_url' and 'repo_group_id'
+
+    Raises:
+        ValueError: If file format is invalid or columns cannot be detected
+    """
     return process_csv(
         filename,
         expected_columns={
@@ -187,9 +246,21 @@ def process_repo_csv(filename: str) -> list:
     )
 
 
-def process_repo_group_csv(filename: str) -> list:
-    """Process repository group CSV file with intelligent header detection"""
-    
+def process_repo_group_csv(filename: str) -> list[dict[str, str]]:
+    """Process repository group CSV file with header detection.
+
+    Processes a CSV file containing repository group information with columns
+    for repo_group_id and repo_group_name. Supports both header and headerless formats.
+
+    Args:
+        filename: Path to the repository group CSV file
+
+    Returns:
+        List of dictionaries with keys 'repo_group_id' and 'repo_group_name'
+
+    Raises:
+        ValueError: If file format is invalid or columns cannot be detected
+    """
     return process_csv(
         filename,
         expected_columns={
diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py
index e0df763dab..8d5408eaea 100644
--- a/augur/application/cli/db.py
+++ b/augur/application/cli/db.py
@@ -44,7 +44,7 @@ def cli(ctx):
 @test_db_connection
 @with_database
 @click.pass_context
-def add_repos(ctx, filename):
+def add_repos(ctx: click.Context, filename: str) -> None:
     """Add repositories to Augur's database from a CSV file.
 
     The CSV file can have headers (recommended):
@@ -56,7 +56,18 @@ def add_repos(ctx, filename):
 
     NOTE: The Group ID must already exist in the REPO_Groups Table.
 
-    If you want to add an entire GitHub organization, refer to the command: augur db add-github-org"""
+    Args:
+        ctx: Click context object containing the database engine
+        filename: Path to the CSV file containing repository data
+
+    Raises:
+        ValueError: If CSV file is malformed or exceeds size limit
+        Exception: For database connection or other unexpected errors
+
+    Note:
+        If you want to add an entire GitHub organization, refer to the
+        command: augur db add-github-org
+    """
     from augur.tasks.github.util.github_task_session import GithubTaskSession
     from augur.util.repo_load_controller import RepoLoadController
 
@@ -121,7 +132,7 @@ def add_repos(ctx, filename):
 @test_db_connection
 @with_database
 @click.pass_context
-def get_repo_groups(ctx):
+def get_repo_groups(ctx: click.Context) -> pd.DataFrame:
     """
     List all repo groups and their associated IDs
     """
@@ -144,9 +155,16 @@ def get_repo_groups(ctx):
 @test_db_connection
 @with_database
 @click.pass_context
-def add_repo_groups(ctx, filename):
-    """
-    Create new repo groups in Augur's database
+def add_repo_groups(ctx: click.Context, filename: str) -> None:
+    """Create new repo groups in Augur's database from a CSV file.
+
+    Args:
+        ctx: Click context object containing the database engine
+        filename: Path to the CSV file containing repository group data
+
+    Raises:
+        ValueError: If CSV file is malformed or exceeds size limit
+        Exception: For database connection or other unexpected errors
     """
     try:
         # Parse CSV (handles headers and column detection)

From 9fff7d8ad766b1387b67145165c8af04abd9c069 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Sat, 8 Nov 2025 14:28:11 -0500
Subject: [PATCH 014/104] add unit tests for CSV processing utilities including
 validation and error handling

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 .../test_cli/test_csv_utils.py                | 357 ++++++++++++++++++
 1 file changed, 357 insertions(+)
 create mode 100644 tests/test_application/test_cli/test_csv_utils.py

diff --git a/tests/test_application/test_cli/test_csv_utils.py b/tests/test_application/test_cli/test_csv_utils.py
new file mode 100644
index 0000000000..c584345ced
--- /dev/null
+++ b/tests/test_application/test_cli/test_csv_utils.py
@@ -0,0 +1,357 @@
+# SPDX-License-Identifier: MIT
+"""Unit tests for CSV processing utilities"""
+
+import pytest
+from unittest.mock import patch
+
+from augur.application.cli.csv_utils import (
+    validate_git_url,
+    validate_positive_int,
+    detect_column_order,
+    process_csv,
+    process_repo_csv,
+    process_repo_group_csv,
+    MAX_FILE_SIZE_BYTES,
+)
+
+
+class TestValidateGitUrl:
+    """Tests for validate_git_url function"""
+
+    def test_valid_github_url(self):
+        """Test validation of valid GitHub URLs"""
+        assert validate_git_url("https://github.com/chaoss/augur")
+        assert validate_git_url("https://github.com/chaoss/augur.git")
+        assert validate_git_url("  https://github.com/chaoss/augur  ")  # with whitespace
+
+    def test_valid_gitlab_url(self):
+        """Test validation of valid GitLab URLs"""
+        assert validate_git_url("https://gitlab.com/chaoss/augur")
+        assert validate_git_url("https://gitlab.com/chaoss/augur.git")
+
+    def test_invalid_url(self):
+        """Test validation of invalid URLs"""
+        assert not validate_git_url("not-a-url")
+        assert not validate_git_url("https://example.com")
+        assert not validate_git_url("123")
+        assert not validate_git_url("")
+
+    def test_whitespace_handling(self):
+        """Test that whitespace is properly stripped"""
+        assert validate_git_url("  https://github.com/chaoss/augur  ")
+
+
+class TestValidatePositiveInt:
+    """Tests for validate_positive_int function"""
+
+    def test_valid_positive_integers(self):
+        """Test validation of valid positive integers"""
+        assert validate_positive_int("1")
+        assert validate_positive_int("42")
+        assert validate_positive_int("9999")
+        assert validate_positive_int("  123  ")  # with whitespace
+
+    def test_zero_is_invalid(self):
+        """Test that zero is not considered a positive integer"""
+        assert not validate_positive_int("0")
+
+    def test_negative_numbers_invalid(self):
+        """Test that negative numbers are invalid"""
+        assert not validate_positive_int("-1")
+        assert not validate_positive_int("-42")
+
+    def test_non_numeric_invalid(self):
+        """Test that non-numeric strings are invalid"""
+        assert not validate_positive_int("abc")
+        assert not validate_positive_int("12.5")
+        assert not validate_positive_int("")
+        assert not validate_positive_int("1a")
+
+    def test_whitespace_handling(self):
+        """Test that whitespace is properly stripped"""
+        assert validate_positive_int("  42  ")
+
+
+class TestDetectColumnOrder:
+    """Tests for detect_column_order function"""
+
+    def test_simple_column_detection(self):
+        """Test basic column order detection"""
+        sample_rows = [
+            ["https://github.com/chaoss/augur", "10"],
+            ["https://github.com/user/repo", "20"],
+        ]
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = detect_column_order(sample_rows, validators)
+        assert result == {"repo_url": 0, "repo_group_id": 1}
+
+    def test_reversed_column_order(self):
+        """Test detection with reversed column order"""
+        sample_rows = [
+            ["10", "https://github.com/chaoss/augur"],
+            ["20", "https://github.com/user/repo"],
+        ]
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = detect_column_order(sample_rows, validators)
+        assert result == {"repo_url": 1, "repo_group_id": 0}
+
+    def test_threshold_detection(self):
+        """Test that detection uses 80% threshold correctly"""
+        # 8 out of 10 rows valid (80% exactly)
+        sample_rows = [
+            ["https://github.com/chaoss/augur", "10"],
+            ["https://github.com/user/repo1", "20"],
+            ["https://github.com/user/repo2", "30"],
+            ["https://github.com/user/repo3", "40"],
+            ["https://github.com/user/repo4", "50"],
+            ["https://github.com/user/repo5", "60"],
+            ["https://github.com/user/repo6", "70"],
+            ["https://github.com/user/repo7", "80"],
+            ["invalid-url", "90"],  # Invalid
+            ["also-invalid", "100"],  # Invalid
+        ]
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = detect_column_order(sample_rows, validators)
+        assert result == {"repo_url": 0, "repo_group_id": 1}
+
+    def test_empty_rows_raises_error(self):
+        """Test that empty sample rows raises ValueError"""
+        with pytest.raises(ValueError, match="Expected .* columns"):
+            detect_column_order([], {"col1": lambda x: True})
+
+    def test_wrong_column_count_raises_error(self):
+        """Test that wrong column count raises ValueError"""
+        sample_rows = [["val1", "val2", "val3"]]
+        validators = {"col1": lambda x: True, "col2": lambda x: True}
+
+        with pytest.raises(ValueError, match="Expected 2 columns.*Found 3"):
+            detect_column_order(sample_rows, validators)
+
+    def test_no_match_found_raises_error(self):
+        """Test that failure to detect a column raises ValueError"""
+        sample_rows = [
+            ["invalid", "invalid"],
+            ["invalid", "invalid"],
+        ]
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        with pytest.raises(ValueError, match="Could not detect column"):
+            detect_column_order(sample_rows, validators)
+
+
+class TestProcessCsv:
+    """Tests for process_csv function"""
+
+    def test_csv_with_headers(self, tmp_path):
+        """Test processing CSV file with headers"""
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text("repo_url,repo_group_id\nhttps://github.com/chaoss/augur,10\nhttps://github.com/user/repo,20")
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = process_csv(str(csv_file), validators)
+        assert len(result) == 2
+        assert result[0] == {"repo_url": "https://github.com/chaoss/augur", "repo_group_id": "10"}
+        assert result[1] == {"repo_url": "https://github.com/user/repo", "repo_group_id": "20"}
+
+    def test_csv_without_headers(self, tmp_path):
+        """Test processing CSV file without headers"""
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text("https://github.com/chaoss/augur,10\nhttps://github.com/user/repo,20")
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = process_csv(str(csv_file), validators)
+        assert len(result) == 2
+        assert result[0] == {"repo_url": "https://github.com/chaoss/augur", "repo_group_id": "10"}
+        assert result[1] == {"repo_url": "https://github.com/user/repo", "repo_group_id": "20"}
+
+    def test_csv_with_different_column_order(self, tmp_path):
+        """Test processing CSV with columns in different order"""
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text("repo_group_id,repo_url\n10,https://github.com/chaoss/augur")
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = process_csv(str(csv_file), validators)
+        assert len(result) == 1
+        assert result[0] == {"repo_url": "https://github.com/chaoss/augur", "repo_group_id": "10"}
+
+    def test_empty_csv_raises_error(self, tmp_path):
+        """Test that empty CSV file raises ValueError"""
+        csv_file = tmp_path / "empty.csv"
+        csv_file.write_text("")
+
+        validators = {"col1": lambda x: True}
+
+        with pytest.raises(ValueError, match="empty"):
+            process_csv(str(csv_file), validators)
+
+    def test_file_size_limit_with_mock(self, tmp_path):
+        """Test file size limit enforcement using mock"""
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text("repo_url,repo_group_id\nhttps://github.com/chaoss/augur,10")
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        # Mock os.path.getsize to return a size larger than limit
+        with patch('os.path.getsize', return_value=MAX_FILE_SIZE_BYTES + 1):
+            with pytest.raises(ValueError, match="exceeds.*limit"):
+                process_csv(str(csv_file), validators)
+
+    def test_missing_required_headers_raises_error(self, tmp_path):
+        """Test that missing required headers raises ValueError"""
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text("wrong_column,another_column\nvalue1,value2")
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        with pytest.raises(ValueError, match="Could not detect column"):
+            process_csv(str(csv_file), validators)
+
+    def test_whitespace_in_values(self, tmp_path):
+        """Test that whitespace in values is properly stripped"""
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text("repo_url,repo_group_id\n  https://github.com/chaoss/augur  ,  10  ")
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = process_csv(str(csv_file), validators)
+        assert result[0] == {"repo_url": "https://github.com/chaoss/augur", "repo_group_id": "10"}
+
+
+class TestProcessRepoCsv:
+    """Tests for process_repo_csv function"""
+
+    def test_process_valid_repo_csv(self, tmp_path):
+        """Test processing a valid repository CSV"""
+        csv_file = tmp_path / "repos.csv"
+        csv_file.write_text("repo_url,repo_group_id\nhttps://github.com/chaoss/augur,10")
+
+        result = process_repo_csv(str(csv_file))
+        assert len(result) == 1
+        assert result[0]["repo_url"] == "https://github.com/chaoss/augur"
+        assert result[0]["repo_group_id"] == "10"
+
+    def test_process_repo_csv_without_headers(self, tmp_path):
+        """Test processing repository CSV without headers"""
+        csv_file = tmp_path / "repos.csv"
+        csv_file.write_text("https://github.com/chaoss/augur,10\nhttps://github.com/user/repo,20")
+
+        result = process_repo_csv(str(csv_file))
+        assert len(result) == 2
+
+
+class TestProcessRepoGroupCsv:
+    """Tests for process_repo_group_csv function"""
+
+    def test_process_valid_repo_group_csv(self, tmp_path):
+        """Test processing a valid repository group CSV"""
+        csv_file = tmp_path / "groups.csv"
+        csv_file.write_text("repo_group_id,repo_group_name\n10,CHAOSS")
+
+        result = process_repo_group_csv(str(csv_file))
+        assert len(result) == 1
+        assert result[0]["repo_group_id"] == "10"
+        assert result[0]["repo_group_name"] == "CHAOSS"
+
+    def test_process_repo_group_csv_without_headers(self, tmp_path):
+        """Test processing repository group CSV without headers"""
+        csv_file = tmp_path / "groups.csv"
+        csv_file.write_text("10,CHAOSS\n20,OpenSource")
+
+        result = process_repo_group_csv(str(csv_file))
+        assert len(result) == 2
+        assert result[0]["repo_group_name"] == "CHAOSS"
+        assert result[1]["repo_group_name"] == "OpenSource"
+
+    def test_empty_group_name_invalid(self, tmp_path):
+        """Test that empty repository group names are handled"""
+        csv_file = tmp_path / "groups.csv"
+        csv_file.write_text("repo_group_id,repo_group_name\n10,ValidName\n20,")
+
+        # This should process the file, but the row with empty name should fail validation
+        # during the detect_column_order phase if there aren't enough valid rows
+        result = process_repo_group_csv(str(csv_file))
+        # Both rows should be parsed; validation happens at application level
+        assert len(result) >= 1
+
+
+class TestEdgeCases:
+    """Tests for edge cases and error conditions"""
+
+    def test_single_row_csv(self, tmp_path):
+        """Test processing CSV with single row"""
+        csv_file = tmp_path / "single.csv"
+        csv_file.write_text("https://github.com/chaoss/augur,10")
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = process_csv(str(csv_file), validators)
+        assert len(result) == 1
+
+    def test_csv_with_extra_whitespace_in_headers(self, tmp_path):
+        """Test CSV with whitespace in header names"""
+        csv_file = tmp_path / "test.csv"
+        csv_file.write_text("  repo_url  ,  repo_group_id  \nhttps://github.com/chaoss/augur,10")
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = process_csv(str(csv_file), validators)
+        assert len(result) == 1
+        assert result[0]["repo_url"] == "https://github.com/chaoss/augur"
+
+    def test_many_rows_csv(self, tmp_path):
+        """Test processing CSV with many rows"""
+        csv_file = tmp_path / "many.csv"
+        lines = ["repo_url,repo_group_id"]
+        for i in range(100):
+            lines.append(f"https://github.com/user/repo{i},{i+1}")
+        csv_file.write_text("\n".join(lines))
+
+        validators = {
+            "repo_url": validate_git_url,
+            "repo_group_id": validate_positive_int,
+        }
+
+        result = process_csv(str(csv_file), validators)
+        assert len(result) == 100

From 15b2dcc6e7838b30cd360d19cb8f36e1c3def94b Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Wed, 12 Nov 2025 13:22:06 -0500
Subject: [PATCH 015/104] move RepoLoadController within the database session
 context

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/application/db/models/augur_operations.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py
index bb29c7571a..45ac1d8167 100644
--- a/augur/application/db/models/augur_operations.py
+++ b/augur/application/db/models/augur_operations.py
@@ -566,9 +566,7 @@ def get_group_repo_count(self, group_name, search = None):
         from augur.util.repo_load_controller import RepoLoadController
 
         with DatabaseSession(logger) as session:
-            controller = RepoLoadController(session)
-
-        result = controller.get_repo_count(source="group", group_name=group_name, user=self, search=search)
+            result = RepoLoadController(session).get_repo_count(source="group", group_name=group_name, user=self, search=search)
 
         return result
 

From 382e7b7f581833189b3e5dd64b6acb2546942ccd Mon Sep 17 00:00:00 2001
From: Xiaoha <blairjade183@gmail.com>
Date: Thu, 13 Nov 2025 20:14:19 +0000
Subject: [PATCH 016/104] Add TopicModelEvent ORM model to augur_data.py

Migration 36 created the topic_model_event table in the database, but
the corresponding SQLAlchemy model was not added to augur_data.py. This
caused ORM-level access to the event table to fail.

This commit adds the TopicModelEvent class with:
- All table columns (event_id, ts, repo_id, model_id, event, level, payload)
- Index definitions for ix_tme_repo_ts and ix_tme_event
- Foreign key constraints to repo and topic_model_meta tables
- Relationship mappings to Repo and TopicModelMeta models

This enables the application to query and manipulate topic modeling
events through the ORM layer.

Related: augur/application/schema/alembic/versions/36_add_topic_model_event.py
Signed-off-by: Xiaoha <blairjade183@gmail.com>
---
 augur/application/db/models/augur_data.py | 55 +++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py
index ddf11e0532..921f9f5336 100644
--- a/augur/application/db/models/augur_data.py
+++ b/augur/application/db/models/augur_data.py
@@ -3705,3 +3705,58 @@ class TopicModelMeta(Base):
     )
 
     repo = relationship("Repo")
+
+
+class TopicModelEvent(Base):
+    __tablename__ = "topic_model_event"
+    __table_args__ = (
+        Index("ix_tme_repo_ts", "repo_id", "ts"),
+        Index("ix_tme_event", "event"),
+        {"schema": "augur_data"}
+    )
+
+    event_id = Column(
+        BigInteger,
+        primary_key=True,
+        comment="Unique identifier for the event"
+    )
+    ts = Column(
+        TIMESTAMP(timezone=True),
+        nullable=False,
+        server_default=text("CURRENT_TIMESTAMP"),
+        comment="Timestamp when the event occurred"
+    )
+    repo_id = Column(
+        ForeignKey("augur_data.repo.repo_id", name="fk_tme_repo_id"),
+        nullable=True,
+        comment="Repository associated with this event"
+    )
+    model_id = Column(
+        UUID(as_uuid=True),
+        ForeignKey(
+            "augur_data.topic_model_meta.model_id",
+            name="fk_tme_model_id",
+            ondelete="SET NULL"
+        ),
+        nullable=True,
+        comment="Topic model associated with this event"
+    )
+    event = Column(
+        Text,
+        nullable=False,
+        comment="Event type or name"
+    )
+    level = Column(
+        Text,
+        nullable=False,
+        server_default=text("'INFO'"),
+        comment="Log level (INFO, WARNING, ERROR, etc.)"
+    )
+    payload = Column(
+        JSONB,
+        nullable=False,
+        comment="Event payload data"
+    )
+
+    repo = relationship("Repo")
+    topic_model = relationship("TopicModelMeta")

From 2f83a2e05e2453f86011ff284cf698cf131c4a22 Mon Sep 17 00:00:00 2001
From: Xiaoha <blairjade183@gmail.com>
Date: Thu, 13 Nov 2025 20:22:18 +0000
Subject: [PATCH 017/104] Add explicit Integer type to repo_id column

Ensure repo_id column type matches migration definition (sa.Integer)
for complete schema consistency between ORM and database.

Signed-off-by: Xiaoha <blairjade183@gmail.com>
---
 augur/application/db/models/augur_data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py
index 921f9f5336..034a2bec01 100644
--- a/augur/application/db/models/augur_data.py
+++ b/augur/application/db/models/augur_data.py
@@ -3727,6 +3727,7 @@ class TopicModelEvent(Base):
         comment="Timestamp when the event occurred"
     )
     repo_id = Column(
+        Integer,
         ForeignKey("augur_data.repo.repo_id", name="fk_tme_repo_id"),
         nullable=True,
         comment="Repository associated with this event"

From 6a406fd929abc574ea98a41edc06d9ebc8e0a63a Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Fri, 14 Nov 2025 17:11:45 -0500
Subject: [PATCH 018/104] Create a migration to synchronize the topic model
 tables

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 .../37_sync_topic_model_migrations.py         | 361 ++++++++++++++++++
 1 file changed, 361 insertions(+)
 create mode 100644 augur/application/schema/alembic/versions/37_sync_topic_model_migrations.py

diff --git a/augur/application/schema/alembic/versions/37_sync_topic_model_migrations.py b/augur/application/schema/alembic/versions/37_sync_topic_model_migrations.py
new file mode 100644
index 0000000000..6a076a750d
--- /dev/null
+++ b/augur/application/schema/alembic/versions/37_sync_topic_model_migrations.py
@@ -0,0 +1,361 @@
+"""sync topic model migrations because Revisions 35 and 36 did not perfectly match their associated SQLAlchemy class models.
+
+Revision ID: 37
+Revises: 36
+Create Date: 2025-11-14 17:09:14.156057
+
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = '37'
+down_revision = '36'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column('topic_model_event', 'event_id',
+               existing_type=sa.BIGINT(),
+               comment='Unique identifier for the event',
+               existing_nullable=False,
+               autoincrement=True,
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'ts',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               comment='Timestamp when the event occurred',
+               existing_nullable=False,
+               existing_server_default=sa.text('CURRENT_TIMESTAMP'),
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'repo_id',
+               existing_type=sa.INTEGER(),
+               comment='Repository associated with this event',
+               existing_nullable=True,
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'model_id',
+               existing_type=sa.UUID(),
+               comment='Topic model associated with this event',
+               existing_nullable=True,
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'event',
+               existing_type=sa.TEXT(),
+               comment='Event type or name',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'level',
+               existing_type=sa.TEXT(),
+               comment='Log level (INFO, WARNING, ERROR, etc.)',
+               existing_nullable=False,
+               existing_server_default=sa.text("'INFO'::text"),
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'payload',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               comment='Event payload data',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'model_id',
+               existing_type=sa.UUID(),
+               comment='Unique identifier for the topic model',
+               existing_nullable=False,
+               existing_server_default=sa.text('gen_random_uuid()'),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'repo_id',
+               existing_type=sa.INTEGER(),
+               type_=sa.BigInteger(),
+               comment='Repository this model was trained on',
+               existing_nullable=True,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'model_method',
+               existing_type=sa.VARCHAR(),
+               comment="Method used for topic modeling (e.g., 'NMF_COUNT', 'LDA_TFIDF')",
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'num_topics',
+               existing_type=sa.INTEGER(),
+               comment='Number of topics in the model',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'num_words_per_topic',
+               existing_type=sa.INTEGER(),
+               comment='Number of words per topic',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'training_parameters',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               type_=sa.JSON(),
+               comment='JSON object containing training parameters',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'model_file_paths',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               type_=sa.JSON(),
+               comment='JSON object containing paths to model artifacts',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'parameters_hash',
+               existing_type=sa.VARCHAR(),
+               comment='Hash of parameters for deduplication',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'coherence_score',
+               existing_type=sa.DOUBLE_PRECISION(precision=53),
+               comment='Coherence score of the model',
+               existing_nullable=False,
+               existing_server_default=sa.text('0.0'),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'perplexity_score',
+               existing_type=sa.DOUBLE_PRECISION(precision=53),
+               comment='Perplexity score of the model',
+               existing_nullable=False,
+               existing_server_default=sa.text('0.0'),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'topic_diversity',
+               existing_type=sa.DOUBLE_PRECISION(precision=53),
+               comment='Topic diversity score',
+               existing_nullable=False,
+               existing_server_default=sa.text('0.0'),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'quality',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               type_=sa.JSON(),
+               comment='Quality metrics',
+               existing_nullable=False,
+               existing_server_default=sa.text("'{}'::jsonb"),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'training_message_count',
+               existing_type=sa.BIGINT(),
+               comment='Number of messages used for training',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'data_fingerprint',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               type_=sa.JSON(),
+               comment='Fingerprint of training data',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'visualization_data',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               type_=sa.JSON(),
+               comment='JSON object containing visualization data for the model',
+               existing_nullable=True,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'training_start_time',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               comment='When training started',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'training_end_time',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               comment='When training ended',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'tool_source',
+               existing_type=sa.VARCHAR(),
+               nullable=True,
+               comment='Standard Augur Metadata',
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'tool_version',
+               existing_type=sa.VARCHAR(),
+               nullable=True,
+               comment='Standard Augur Metadata',
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'data_source',
+               existing_type=sa.VARCHAR(),
+               nullable=True,
+               comment='Standard Augur Metadata',
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'data_collection_date',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               nullable=True,
+               existing_server_default=sa.text('CURRENT_TIMESTAMP'),
+               schema='augur_data')
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column('topic_model_meta', 'data_collection_date',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               nullable=False,
+               existing_server_default=sa.text('CURRENT_TIMESTAMP'),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'data_source',
+               existing_type=sa.VARCHAR(),
+               nullable=False,
+               comment=None,
+               existing_comment='Standard Augur Metadata',
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'tool_version',
+               existing_type=sa.VARCHAR(),
+               nullable=False,
+               comment=None,
+               existing_comment='Standard Augur Metadata',
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'tool_source',
+               existing_type=sa.VARCHAR(),
+               nullable=False,
+               comment=None,
+               existing_comment='Standard Augur Metadata',
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'training_end_time',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               comment=None,
+               existing_comment='When training ended',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'training_start_time',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               comment=None,
+               existing_comment='When training started',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'visualization_data',
+               existing_type=sa.JSON(),
+               type_=postgresql.JSONB(astext_type=sa.Text()),
+               comment=None,
+               existing_comment='JSON object containing visualization data for the model',
+               existing_nullable=True,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'data_fingerprint',
+               existing_type=sa.JSON(),
+               type_=postgresql.JSONB(astext_type=sa.Text()),
+               comment=None,
+               existing_comment='Fingerprint of training data',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'training_message_count',
+               existing_type=sa.BIGINT(),
+               comment=None,
+               existing_comment='Number of messages used for training',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'quality',
+               existing_type=sa.JSON(),
+               type_=postgresql.JSONB(astext_type=sa.Text()),
+               comment=None,
+               existing_comment='Quality metrics',
+               existing_nullable=False,
+               existing_server_default=sa.text("'{}'::jsonb"),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'topic_diversity',
+               existing_type=sa.DOUBLE_PRECISION(precision=53),
+               comment=None,
+               existing_comment='Topic diversity score',
+               existing_nullable=False,
+               existing_server_default=sa.text('0.0'),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'perplexity_score',
+               existing_type=sa.DOUBLE_PRECISION(precision=53),
+               comment=None,
+               existing_comment='Perplexity score of the model',
+               existing_nullable=False,
+               existing_server_default=sa.text('0.0'),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'coherence_score',
+               existing_type=sa.DOUBLE_PRECISION(precision=53),
+               comment=None,
+               existing_comment='Coherence score of the model',
+               existing_nullable=False,
+               existing_server_default=sa.text('0.0'),
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'parameters_hash',
+               existing_type=sa.VARCHAR(),
+               comment=None,
+               existing_comment='Hash of parameters for deduplication',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'model_file_paths',
+               existing_type=sa.JSON(),
+               type_=postgresql.JSONB(astext_type=sa.Text()),
+               comment=None,
+               existing_comment='JSON object containing paths to model artifacts',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'training_parameters',
+               existing_type=sa.JSON(),
+               type_=postgresql.JSONB(astext_type=sa.Text()),
+               comment=None,
+               existing_comment='JSON object containing training parameters',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'num_words_per_topic',
+               existing_type=sa.INTEGER(),
+               comment=None,
+               existing_comment='Number of words per topic',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'num_topics',
+               existing_type=sa.INTEGER(),
+               comment=None,
+               existing_comment='Number of topics in the model',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'model_method',
+               existing_type=sa.VARCHAR(),
+               comment=None,
+               existing_comment="Method used for topic modeling (e.g., 'NMF_COUNT', 'LDA_TFIDF')",
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'repo_id',
+               existing_type=sa.BigInteger(),
+               type_=sa.INTEGER(),
+               comment=None,
+               existing_comment='Repository this model was trained on',
+               existing_nullable=True,
+               schema='augur_data')
+    op.alter_column('topic_model_meta', 'model_id',
+               existing_type=sa.UUID(),
+               comment=None,
+               existing_comment='Unique identifier for the topic model',
+               existing_nullable=False,
+               existing_server_default=sa.text('gen_random_uuid()'),
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'payload',
+               existing_type=postgresql.JSONB(astext_type=sa.Text()),
+               comment=None,
+               existing_comment='Event payload data',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'level',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Log level (INFO, WARNING, ERROR, etc.)',
+               existing_nullable=False,
+               existing_server_default=sa.text("'INFO'::text"),
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'event',
+               existing_type=sa.TEXT(),
+               comment=None,
+               existing_comment='Event type or name',
+               existing_nullable=False,
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'model_id',
+               existing_type=sa.UUID(),
+               comment=None,
+               existing_comment='Topic model associated with this event',
+               existing_nullable=True,
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'repo_id',
+               existing_type=sa.INTEGER(),
+               comment=None,
+               existing_comment='Repository associated with this event',
+               existing_nullable=True,
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'ts',
+               existing_type=postgresql.TIMESTAMP(timezone=True),
+               comment=None,
+               existing_comment='Timestamp when the event occurred',
+               existing_nullable=False,
+               existing_server_default=sa.text('CURRENT_TIMESTAMP'),
+               schema='augur_data')
+    op.alter_column('topic_model_event', 'event_id',
+               existing_type=sa.BIGINT(),
+               comment=None,
+               existing_comment='Unique identifier for the event',
+               existing_nullable=False,
+               autoincrement=True,
+               schema='augur_data')
+    # ### end Alembic commands ###

From 17963e42c3f91d795e3766ed1511a2b0dd88d985 Mon Sep 17 00:00:00 2001
From: PredictiveManish <manish.tiwari.09@zohomail.in>
Date: Sat, 15 Nov 2025 10:32:19 +0530
Subject: [PATCH 019/104] Fix: collection_intervals into seconds

Signed-off-by: PredictiveManish <manish.tiwari.09@zohomail.in>
---
 docs/source/getting-started/collecting-data.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst
index cb21922047..7c75097fd5 100644
--- a/docs/source/getting-started/collecting-data.rst
+++ b/docs/source/getting-started/collecting-data.rst
@@ -97,7 +97,10 @@ The celery monitor is responsible for generating the tasks that will tell the ot
 - ``refresh_materialized_views_interval_in_days``, number of days to wait between refreshes of materialized views.
 
 If you choose, you can also adjust the values in the ``Tasks`` block if you would like to control when tasks should be re-run on a given repository. 
-This is specified as a number of days since the last successful run.
+
+- ``collection_interval``, the interval (in seconds) at which the collection monitor task runs to schedule new collection jobs. This is different from the other interval values which use days.
+
+- ``core_collection_interval_days``, ``secondary_collection_interval_days``, ``facade_collection_interval_days``, and ``ml_collection_interval_days``, which specify the number of days since the last successful run before a task should be re-run on a given repository.
 
 Adding repos for collection
 -----------------------------

From a9694236473f804b76323cf3dd3f5f7e740e9905 Mon Sep 17 00:00:00 2001
From: Adeeba Nizam <adeebanizam63@gmail.com>
Date: Mon, 10 Nov 2025 02:40:48 +0530
Subject: [PATCH 020/104] docs: move contributor lists to CONTRIBUTORS.md and
 update README for clarity

Signed-off-by: Adeeba Nizam <adeebanizam63@gmail.com>
---
 CONTRIBUTORS.md | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
 README.md       | 66 ++-----------------------------------
 2 files changed, 89 insertions(+), 64 deletions(-)
 create mode 100644 CONTRIBUTORS.md

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
new file mode 100644
index 0000000000..a898578fd0
--- /dev/null
+++ b/CONTRIBUTORS.md
@@ -0,0 +1,87 @@
+# Contributors & Participants
+
+This file contains full attribution lists for:
+- Current maintainers
+- Founding Maintainers
+- Former maintainers
+- Contributors
+- Google Summer of Code participants (by year)
+
+---
+## Current Maintainers
+- Sean P. Goggins — [@sgoggins](https://github.com/sgoggins)
+- Adrian Edwards — [@MoralCode](https://github.com/MoralCode)
+- Andrew Brain — [@ABrain7710](https://github.com/ABrain7710)
+- Isaac Milarsky — [@IsaacMilarky](https://github.com/IsaacMilarky)
+- John McGinnis — [@Ulincys](https://github.com/Ulincsys) 
+
+---
+
+## Founding Maintainers
+- Derek Howard — [@howderek](https://github.com/howderek)
+
+## Former Maintainers
+- Carter Landis — [@ccarterlandis](https://github.com/ccarterlandis)  
+- Gabe Heim — [@gabe-heim](https://github.com/gabe-heim)  
+- Matt Snell — [@Nebrethar](https://github.com/Nebrethar)  
+- Christian Cmehil-Warn — [@christiancme](https://github.com/christiancme)  
+- Jonah Zukosky — [@jonahz5222](https://github.com/jonahz5222)  
+- Carolyn Perniciaro — [@CMPerniciaro](https://github.com/CMPerniciaro)  
+- Elita Nelson — [@ElitaNelson](https://github.com/ElitaNelson)  
+- Michael Woodruff — [@michaelwoodruffdev](https://github.com/michaelwoodruffdev/)  
+- Max Balk — [@maxbalk](https://github.com/maxbalk/)
+
+---
+
+## Contributors
+- [Dawn Foster](https://github.com/geekygirldawn)
+- [Ivana Atanasova](https://github.com/ivanayov)
+- [Georg J.P. Link](https://github.com/GeorgLink)
+- [Gary P. White](https://github.com/garypwhite)
+
+---
+
+## GSoC 2025 Participants
+- [Akshat Baranwal](https://github.com/akshatb2006)
+- [Asish Kumar](https://github.com/officialasishkumar)
+- [Jiahong Lin](https://github.com/xiaoha-cloud)
+
+---
+
+## GSoC 2022 Participants
+- [Kaxada](https://github.com/kaxada)
+- [Mabel F](https://github.com/mabelbot)
+- [Priya Srivastava](https://github.com/Priya730)
+- [Ramya Kappagantu](https://github.com/RamyaKappagantu)
+- [Yash Prakash](https://gist.github.com/yash-yp)
+
+---
+
+## GSoC 2021 Participants
+- [Dhruv Sachdev](https://github.com/Dhruv-Sachdev1313)
+- [Rashmi K A](https://github.com/Rashmi-K-A)
+- [Yash Prakash](https://gist.github.com/yash-yp)
+- [Anuj Lamoria](https://github.com/anujlamoria)
+- [Yeming Gu](https://github.com/gymgym1212)
+- [Ritik Malik](https://gist.github.com/ritik-malik)
+
+---
+
+## GSoC 2020 Participants
+- [Akshara P](https://github.com/aksh555)
+- [Tianyi Zhou](https://github.com/tianyichow)
+- [Pratik Mishra](https://github.com/pratikmishra356)
+- [Sarit Adhikari](https://github.com/sarit-adh)
+- [Saicharan Reddy](https://github.com/mrsaicharan1)
+- [Abhinav Bajpai](https://github.com/abhinavbajpai2012)
+
+---
+
+## GSoC 2019 Participants
+- [Bingwen Ma](https://github.com/bing0n3)
+- [Parth Sharma](https://github.com/parthsharma2)
+
+---
+
+## GSoC 2018 Participants
+- [Keanu Nichols](https://github.com/kmn5409)
diff --git a/README.md b/README.md
index bac449c3d8..fef4c26622 100644
--- a/README.md
+++ b/README.md
@@ -90,68 +90,6 @@ Augur is free software: you can redistribute it and/or modify it under the terms
 This work has been funded through the Alfred P. Sloan Foundation, Mozilla, The Reynolds Journalism Institute, contributions from VMWare, Red Hat Software, Grace Hopper's Open Source Day, GitHub, Microsoft, Twitter, Adobe, the Gluster Project, Open Source Summit (NA/Europe), and the Linux Foundation Compliance Summit.
 
 Significant design contributors include Kate Stewart, Dawn Foster, Duane O'Brien, Remy Decausemaker, others omitted due to the  memory limitations of project maintainers, and 15 Google Summer of Code Students. 
+## Maintainers & Contributors
 
-Current maintainers
---------------------
-- `Derek Howard <https://github.com/howderek>`_
-- `Andrew Brain <https://github.com/ABrain7710>`_
-- `Isaac Milarsky <https://github.com/IsaacMilarky>`_
-- `John McGinnis <https://github.com/Ulincys>`_ 
-- `Sean P. Goggins <https://github.com/sgoggins>`_ 
-
-Former maintainers
---------------------
-- `Carter Landis <https://github.com/ccarterlandis>`_
-- `Gabe Heim <https://github.com/gabe-heim>`_
-- `Matt Snell <https://github.com/Nebrethar>`_
-- `Christian Cmehil-Warn <https://github.com/christiancme>`_
-- `Jonah Zukosky <https://github.com/jonahz5222>`_
-- `Carolyn Perniciaro <https://github.com/CMPerniciaro>`_
-- `Elita Nelson <https://github.com/ElitaNelson>`_
-- `Michael Woodruff <https://github.com/michaelwoodruffdev/>`_
-- `Max Balk <https://github.com/maxbalk/>`_
-
-Contributors
---------------------
-- `Dawn Foster <https://github.com/geekygirldawn/>`_
-- `Ivana Atanasova <https://github.com/ivanayov/>`_
-- `Georg J.P. Link <https://github.com/GeorgLink/>`_
-- `Gary P White <https://github.com/garypwhite/>`_
-
-GSoC 2025 Participants
------------------------
-
-GSoC 2022 participants
------------------------
-- `Kaxada <https://github.com/kaxada>`_
-- `Mabel F <https://github.com/mabelbot>`_
-- `Priya Srivastava <https://github.com/Priya730>`_
-- `Ramya Kappagantu <https://github.com/RamyaKappagantu>`_
-- `Yash Prakash <https://gist.github.com/yash-yp>`_
-
-GSoC 2021 participants
------------------------
-- `Dhruv Sachdev <https://github.com/Dhruv-Sachdev1313>`_
-- `Rashmi K A <https://github.com/Rashmi-K-A>`_
-- `Yash Prakash <https://github.com/yash2002109/>`_
-- `Anuj Lamoria <https://github.com/anujlamoria/>`_
-- `Yeming Gu <https://github.com/gymgym1212/>`_
-- `Ritik Malik <https://gist.github.com/ritik-malik>`_
-
-GSoC 2020 participants
------------------------
-- `Akshara P <https://github.com/aksh555/>`_
-- `Tianyi Zhou <https://github.com/tianyichow/>`_
-- `Pratik Mishra <https://github.com/pratikmishra356/>`_
-- `Sarit Adhikari <https://github.com/sarit-adh/>`_
-- `Saicharan Reddy <https://github.com/mrsaicharan1/>`_
-- `Abhinav Bajpai <https://github.com/abhinavbajpai2012/>`_
-
-GSoC 2019 participants
------------------------
-- `Bingwen Ma <https://github.com/bing0n3/>`_
-- `Parth Sharma <https://github.com/parthsharma2/>`_
-
-GSoC 2018 participants
------------------------
-- `Keanu Nichols <https://github.com/kmn5409/>`_
+Refer to [CONTRIBUTORS.md](./CONTRIBUTORS.md) for detailed information about project maintainers, contributors, and GSoC participants.

From 95b2b7835298b9613f1a83d35b7720925dea1733 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 11:40:10 -0500
Subject: [PATCH 021/104] connect up the url in another place to prevent errors
 about a missing config file

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/schema/alembic/env.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index b6be9dee05..6949acd16f 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -5,6 +5,8 @@
 from augur.application.db.models.base import Base
 from augur.application.db.engine import get_database_string
 from sqlalchemy import create_engine, event
+import os
+
 
 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.
@@ -26,6 +28,9 @@
 # my_important_option = config.get_main_option("my_important_option")
 # ... etc.
 
+# possibly swap sqlalchemy.url with AUGUR_DB env var too
+
+sqlalchemy_url = os.getenv("AUGUR_DB") or config.get_main_option("sqlalchemy.url")
 
 def run_migrations_offline():
     """Run migrations in 'offline' mode.
@@ -39,9 +44,8 @@ def run_migrations_offline():
     script output.
 
     """
-    url = config.get_main_option("sqlalchemy.url")
     context.configure(
-        url=url,
+        url=sqlalchemy_url,
         target_metadata=target_metadata,
         literal_binds=True,
         dialect_opts={"paramstyle": "named"},
@@ -58,7 +62,7 @@ def run_migrations_online():
     and associate a connection with the context.
 
     """
-    url = get_database_string()
+    url = sqlalchemy_url
     engine = create_engine(url)
 
     @event.listens_for(engine, "connect", insert=True)

From d78f5dceb4f35218e9d995b7d24327b703addd8f Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 11:40:19 -0500
Subject: [PATCH 022/104] install python-dotenv

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 pyproject.toml |  1 +
 uv.lock        | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 801ac54574..264aff98f4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,6 +99,7 @@ dev = [
     { include-group = "test" },
     { include-group = "debug" },
     { include-group = "docs" },
+    "python-dotenv>=1.2.1",
 ]
 lint = [
     "pylint",
diff --git a/uv.lock b/uv.lock
index 37df99ba32..ca3f330ff8 100644
--- a/uv.lock
+++ b/uv.lock
@@ -224,6 +224,7 @@ dev = [
     { name = "mypy" },
     { name = "pylint" },
     { name = "pytest" },
+    { name = "python-dotenv" },
     { name = "setuptools" },
     { name = "sphinx" },
     { name = "sphinx-rtd-theme" },
@@ -346,6 +347,7 @@ dev = [
     { name = "mypy", specifier = ">=1.18.2" },
     { name = "pylint" },
     { name = "pytest" },
+    { name = "python-dotenv", specifier = ">=1.2.1" },
     { name = "setuptools" },
     { name = "sphinx", specifier = "==7.2.6" },
     { name = "sphinx-rtd-theme", specifier = "==2.0.0" },
@@ -2782,6 +2784,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
 ]
 
+[[package]]
+name = "python-dotenv"
+version = "1.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
+]
+
 [[package]]
 name = "python-http-client"
 version = "3.3.7"

From 8f7368f625f811f8d6411de4edb5ede8b6343ffe Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 12:09:35 -0500
Subject: [PATCH 023/104] load from .env

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/schema/alembic/env.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index 6949acd16f..664f96afd2 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -5,8 +5,10 @@
 from augur.application.db.models.base import Base
 from augur.application.db.engine import get_database_string
 from sqlalchemy import create_engine, event
+from dotenv import load_dotenv
 import os
 
+load_dotenv()
 
 # this is the Alembic Config object, which provides
 # access to the values within the .ini file in use.

From 972303a4aa1c8c95f709d1ed665cf388a017184f Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 12:10:21 -0500
Subject: [PATCH 024/104] remove date from migration filename format

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 alembic.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/alembic.ini b/alembic.ini
index c8976b991b..c36965bb80 100644
--- a/alembic.ini
+++ b/alembic.ini
@@ -5,7 +5,7 @@
 script_location = augur/application/schema/alembic
 
 # template used to generate migration files
-file_template = %%(year)d-%%(month).2d-%%(day).2d_%%(rev)s_%%(slug)s
+file_template = %%(rev)s_%%(slug)s
 
 # sys.path path, will be prepended to sys.path if present.
 # defaults to the current working directory.

From 0e54842c242dcb92ae9c952be21f467016ccd6ba Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 13:21:26 -0500
Subject: [PATCH 025/104] set up alembic to automatically determine the next
 version number

Generated-by: gpt-5 via cursor
Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/schema/alembic/env.py | 32 +++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index 664f96afd2..43b001ddaf 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -7,6 +7,8 @@
 from sqlalchemy import create_engine, event
 from dotenv import load_dotenv
 import os
+import re
+from pathlib import Path
 
 load_dotenv()
 
@@ -34,6 +36,34 @@
 
 sqlalchemy_url = os.getenv("AUGUR_DB") or config.get_main_option("sqlalchemy.url")
 
+
+VERSIONS_DIR = Path(__file__).parent / "versions"
+
+def _next_int_rev() -> str:
+    max_rev = 0
+    for p in VERSIONS_DIR.glob("*.py"):
+        try:
+            txt = p.read_text(encoding="utf-8")
+        except Exception:
+            continue
+        m = re.search(r"^revision\s*=\s*['\"]([^'\"]+)['\"]", txt, re.M)
+        if m and m.group(1).isdigit():
+            max_rev = max(max_rev, int(m.group(1)))
+    return str(max_rev + 1)
+
+def process_revision_directives(context, revision, directives):
+    if not directives:
+        return
+    script = directives[0]
+    # If user passed --rev-id, honor it; otherwise override Alembic's default
+    opts = getattr(context.config, "cmd_opts", None)
+    user_rev_id = getattr(opts, "rev_id", None)
+    if user_rev_id:
+        script.rev_id = str(user_rev_id)
+    else:
+        script.rev_id = _next_int_rev()
+
+
 def run_migrations_offline():
     """Run migrations in 'offline' mode.
 
@@ -51,6 +81,7 @@ def run_migrations_offline():
         target_metadata=target_metadata,
         literal_binds=True,
         dialect_opts={"paramstyle": "named"},
+        process_revision_directives=process_revision_directives,
     )
 
     with context.begin_transaction():
@@ -84,6 +115,7 @@ def set_search_path(dbapi_connection, connection_record):
             version_table_schema=target_metadata.schema,
             include_schemas=True,
             compare_type=True,
+            process_revision_directives=process_revision_directives,
         )
 
         with context.begin_transaction():

From 28fb8397dd5159a76805e22d3e4e8197b9d3e3c8 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 13:29:09 -0500
Subject: [PATCH 026/104] replace file contents-based revision check with one
 that just looks at the filenames

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/schema/alembic/env.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index 43b001ddaf..95f5bd4270 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -42,11 +42,8 @@
 def _next_int_rev() -> str:
     max_rev = 0
     for p in VERSIONS_DIR.glob("*.py"):
-        try:
-            txt = p.read_text(encoding="utf-8")
-        except Exception:
-            continue
-        m = re.search(r"^revision\s*=\s*['\"]([^'\"]+)['\"]", txt, re.M)
+        pathname = Path(p).name
+        m = re.search(r"^([\d]+)_[a-zA-Z0-9_]+.py", pathname, re.M)
         if m and m.group(1).isdigit():
             max_rev = max(max_rev, int(m.group(1)))
     return str(max_rev + 1)

From e286f8f535e958cefc05e4bbcf7f7295c0e5f6c2 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 14:11:03 -0500
Subject: [PATCH 027/104] =?UTF-8?q?remove=20search=20paths=20"It=20can=20m?=
 =?UTF-8?q?ake=20reflection=20=E2=80=9Close=E2=80=9D=20schema=20names.=20R?=
 =?UTF-8?q?emove=20the=20connect=20listener=20that=20sets=20search=5Fpath?=
 =?UTF-8?q?=20while=20generating=20migrations."=20-=20gpt5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Assisted-by: GPT5 via cursor
Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/schema/alembic/env.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index 95f5bd4270..f30febbb95 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -95,14 +95,6 @@ def run_migrations_online():
     url = sqlalchemy_url
     engine = create_engine(url)
 
-    @event.listens_for(engine, "connect", insert=True)
-    def set_search_path(dbapi_connection, connection_record):
-        existing_autocommit = dbapi_connection.autocommit
-        dbapi_connection.autocommit = True
-        cursor = dbapi_connection.cursor()
-        cursor.execute("SET SESSION search_path=public,augur_data,augur_operations,spdx")
-        cursor.close()
-        dbapi_connection.autocommit = existing_autocommit
 
 
     with engine.connect() as connection:

From ec7793da5e683395b732e58a7def4547176c05fe Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 14:53:32 -0500
Subject: [PATCH 028/104] update alembic

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 pyproject.toml |  4 ++--
 uv.lock        | 18 ++++++++++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 264aff98f4..ffb17b99d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ classifiers = [
     "Programming Language :: Python :: 3.10"
 ]
 dependencies = [
-    "alembic==1.8.1",
+    "alembic>=1.17.1",
     "Beaker==1.11.0",
     "blinker==1.4",
     "bokeh==2.0.2",
@@ -86,7 +86,7 @@ dependencies = [
     "toml",
     "toolz>=0.8.2",
     "tornado==6.4.1",
-    "typing-extensions==4.7.1",
+    "typing-extensions>=4.7",
     "Werkzeug~=2.0.0",
     "xgboost==3.0.2",
     "xlrd==2.0.1",
diff --git a/uv.lock b/uv.lock
index ca3f330ff8..84bcaef4e6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -30,15 +30,17 @@ wheels = [
 
 [[package]]
 name = "alembic"
-version = "1.8.1"
+version = "1.17.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "mako" },
     { name = "sqlalchemy" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/37/ab/80e6d86ca81235ea1a7104089dddf74de4b45f8af0a05d4b265be44d6ff9/alembic-1.8.1.tar.gz", hash = "sha256:cd0b5e45b14b706426b833f06369b9a6d5ee03f826ec3238723ce8caaf6e5ffa", size = 1255927, upload-time = "2022-07-13T14:18:50.766Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6e/b6/2a81d7724c0c124edc5ec7a167e85858b6fd31b9611c6fb8ecf617b7e2d3/alembic-1.17.1.tar.gz", hash = "sha256:8a289f6778262df31571d29cca4c7fbacd2f0f582ea0816f4c399b6da7528486", size = 1981285, upload-time = "2025-10-29T00:23:16.667Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b3/c8/69600a8138a56794713ecdb8b75b14fbe32a410bc444683f27dbab93c0ca/alembic-1.8.1-py3-none-any.whl", hash = "sha256:0a024d7f2de88d738d7395ff866997314c837be6104e90c5724350313dee4da4", size = 209845, upload-time = "2022-07-13T14:18:53.415Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/32/7df1d81ec2e50fb661944a35183d87e62d3f6c6d9f8aff64a4f245226d55/alembic-1.17.1-py3-none-any.whl", hash = "sha256:cbc2386e60f89608bb63f30d2d6cc66c7aaed1fe105bd862828600e5ad167023", size = 247848, upload-time = "2025-10-29T00:23:18.79Z" },
 ]
 
 [[package]]
@@ -265,7 +267,7 @@ test = [
 
 [package.metadata]
 requires-dist = [
-    { name = "alembic", specifier = "==1.8.1" },
+    { name = "alembic", specifier = ">=1.17.1" },
     { name = "beaker", specifier = "==1.11.0" },
     { name = "blinker", specifier = "==1.4" },
     { name = "bokeh", specifier = "==2.0.2" },
@@ -332,7 +334,7 @@ requires-dist = [
     { name = "toml" },
     { name = "toolz", specifier = ">=0.8.2" },
     { name = "tornado", specifier = "==6.4.1" },
-    { name = "typing-extensions", specifier = "==4.7.1" },
+    { name = "typing-extensions", specifier = ">=4.7" },
     { name = "werkzeug", specifier = "~=2.0.0" },
     { name = "xgboost", specifier = "==3.0.2" },
     { name = "xlrd", specifier = "==2.0.1" },
@@ -4076,11 +4078,11 @@ wheels = [
 
 [[package]]
 name = "typing-extensions"
-version = "4.7.1"
+version = "4.15.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/3c/8b/0111dd7d6c1478bf83baa1cab85c686426c7a6274119aceb2bd9d35395ad/typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2", size = 72876, upload-time = "2023-07-02T14:20:55.045Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ec/6b/63cc3df74987c36fe26157ee12e09e8f9db4de771e0f3404263117e75b95/typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36", size = 33232, upload-time = "2023-07-02T14:20:53.275Z" },
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
 ]
 
 [[package]]

From 7b18880c5ffd927260ee60fbf5ac014acd58cfd2 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 14:55:06 -0500
Subject: [PATCH 029/104] include schemas/be schema-aware in offline version of
 migrations too

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/schema/alembic/env.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index f30febbb95..2b6fa91f8d 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -79,6 +79,7 @@ def run_migrations_offline():
         literal_binds=True,
         dialect_opts={"paramstyle": "named"},
         process_revision_directives=process_revision_directives,
+        include_schemas=True,
     )
 
     with context.begin_transaction():

From 0f7da8e3372576ee68a8008466659b8e2b5537fb Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 15:35:21 -0500
Subject: [PATCH 030/104] use the public schema by default for the version
 table schema.

Trying to explicitly set it causes the version table to get dropped as part of the next generated automatic migration

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/schema/alembic/env.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index 2b6fa91f8d..827492f7a2 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -102,7 +102,6 @@ def run_migrations_online():
         context.configure(
             connection=connection,
             target_metadata=target_metadata,
-            version_table_schema=target_metadata.schema,
             include_schemas=True,
             compare_type=True,
             process_revision_directives=process_revision_directives,

From 31bd7f447c51a2627eb313e8856e69ebdcf07a3e Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 18 Nov 2025 14:56:53 -0500
Subject: [PATCH 031/104] remove unused imports per reviewdog

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/schema/alembic/env.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index 827492f7a2..bf2993c4b1 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -3,8 +3,7 @@
 
 from alembic import context
 from augur.application.db.models.base import Base
-from augur.application.db.engine import get_database_string
-from sqlalchemy import create_engine, event
+from sqlalchemy import create_engine
 from dotenv import load_dotenv
 import os
 import re

From 361dbf854da15a726f0587449a30b6143360b037 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 18 Nov 2025 15:32:40 -0500
Subject: [PATCH 032/104] python-dotenv is not just a dev dependency

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 pyproject.toml | 2 +-
 uv.lock        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ffb17b99d8..908558f239 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,6 +64,7 @@ dependencies = [
     "psycopg2-binary==2.9.9",
     "pylint==2.15.5",
     "python-crfsuite>=0.9.8",
+    "python-dotenv>=1.2.1",
     "pyYaml",
     "redis==4.3.3",
     "requests==2.32.0",
@@ -99,7 +100,6 @@ dev = [
     { include-group = "test" },
     { include-group = "debug" },
     { include-group = "docs" },
-    "python-dotenv>=1.2.1",
 ]
 lint = [
     "pylint",
diff --git a/uv.lock b/uv.lock
index 84bcaef4e6..819bc2be00 100644
--- a/uv.lock
+++ b/uv.lock
@@ -187,6 +187,7 @@ dependencies = [
     { name = "psycopg2-binary" },
     { name = "pylint" },
     { name = "python-crfsuite" },
+    { name = "python-dotenv" },
     { name = "pyyaml" },
     { name = "redis" },
     { name = "requests" },
@@ -226,7 +227,6 @@ dev = [
     { name = "mypy" },
     { name = "pylint" },
     { name = "pytest" },
-    { name = "python-dotenv" },
     { name = "setuptools" },
     { name = "sphinx" },
     { name = "sphinx-rtd-theme" },
@@ -312,6 +312,7 @@ requires-dist = [
     { name = "psycopg2-binary", specifier = "==2.9.9" },
     { name = "pylint", specifier = "==2.15.5" },
     { name = "python-crfsuite", specifier = ">=0.9.8" },
+    { name = "python-dotenv", specifier = ">=1.2.1" },
     { name = "pyyaml" },
     { name = "redis", specifier = "==4.3.3" },
     { name = "requests", specifier = "==2.32.0" },
@@ -349,7 +350,6 @@ dev = [
     { name = "mypy", specifier = ">=1.18.2" },
     { name = "pylint" },
     { name = "pytest" },
-    { name = "python-dotenv", specifier = ">=1.2.1" },
     { name = "setuptools" },
     { name = "sphinx", specifier = "==7.2.6" },
     { name = "sphinx-rtd-theme", specifier = "==2.0.0" },

From 21a02b222f4474f3bedbb44acacfc811426a5d07 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
Date: Tue, 18 Nov 2025 18:41:43 -0500
Subject: [PATCH 033/104] Update John's name

Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
---
 CONTRIBUTORS.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index a898578fd0..8599944b00 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -13,7 +13,7 @@ This file contains full attribution lists for:
 - Adrian Edwards — [@MoralCode](https://github.com/MoralCode)
 - Andrew Brain — [@ABrain7710](https://github.com/ABrain7710)
 - Isaac Milarsky — [@IsaacMilarky](https://github.com/IsaacMilarky)
-- John McGinnis — [@Ulincys](https://github.com/Ulincsys) 
+- John McGinness — [@Ulincys](https://github.com/Ulincsys) 
 
 ---
 
@@ -85,3 +85,4 @@ This file contains full attribution lists for:
 
 ## GSoC 2018 Participants
 - [Keanu Nichols](https://github.com/kmn5409)
+

From f33054b13f8f0d46cae2e9dc07e644a836a9dea8 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Sun, 16 Nov 2025 21:52:20 -0500
Subject: [PATCH 034/104] refactor DEI and user CLI functions to use context
 managers for database sessions and improve error handling

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/api/routes/dei.py       | 155 ++++++++++++++++++----------------
 augur/application/cli/user.py |  51 +++++------
 2 files changed, 108 insertions(+), 98 deletions(-)

diff --git a/augur/api/routes/dei.py b/augur/api/routes/dei.py
index 646081ba2c..44fe014615 100644
--- a/augur/api/routes/dei.py
+++ b/augur/api/routes/dei.py
@@ -12,7 +12,7 @@
 from augur.application.db.models import ClientApplication, CollectionStatus, Repo, RepoGroup, BadgingDEI
 from augur.application.db.session import DatabaseSession
 
-from augur.tasks.util.collection_util import CollectionRequest,AugurTaskRoutine, get_enabled_phase_names_from_config, core_task_success_util
+from augur.tasks.util.collection_util import CollectionRequest,AugurTaskRoutine, get_enabled_phase_names_from_config_session, core_task_success_util
 from augur.tasks.start_tasks import prelim_phase, primary_repo_collect_phase
 from augur.tasks.github.util.util import get_repo_weight_by_issue
 
@@ -33,70 +33,71 @@ def dei_track_repo(application: ClientApplication):
 
     if not (dei_id and level and repo_url):
         return jsonify({"status": "Missing argument"}), 400
-    
+
     repo_url = repo_url.lower()
-    
-    session = DatabaseSession(logger, engine=current_app.engine)
-    session.autocommit = True
-    repo: Repo = session.query(Repo).filter(Repo.repo_git==repo_url).first()
-    if repo:
-        # Making the assumption that only new repos will be added with this endpoint
-        return jsonify({"status": "Repo already exists"})
-    
-    frontend_repo_group: RepoGroup = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first()
-    repo_id = Repo.insert_github_repo(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="")
-    if not repo_id:
-        return jsonify({"status": "Error adding repo"})
-    
-    repo = Repo.get_by_id(session, repo_id)
-    repo_git = repo.repo_git
-    pr_issue_count = get_repo_weight_by_issue(logger, repo_git)
-
-    record = {
-        "repo_id": repo_id,
-        "issue_pr_sum": pr_issue_count,
-        "core_weight": -9223372036854775808,
-        "secondary_weight": -9223372036854775808,
-        "ml_weight": -9223372036854775808
-    }
-
-    collection_status_unique = ["repo_id"]
-    session.insert_data(record, CollectionStatus, collection_status_unique, on_conflict_update=False)
-
-    record = {
-        "badging_id": dei_id,
-        "level": level,
-        "repo_id": repo_id
-    }
-
-    enabled_phase_names = get_enabled_phase_names_from_config_session(session, logger)
-
-    #Primary collection hook.
-    primary_enabled_phases = []
-
-    #Primary jobs
-    if prelim_phase.__name__ in enabled_phase_names:
-        primary_enabled_phases.append(prelim_phase)
-    
-    primary_enabled_phases.append(primary_repo_collect_phase)
-
-    #task success is scheduled no matter what the config says.
-    def core_task_success_util_gen(repo_git):
-        return core_task_success_util.si(repo_git)
-    
-    primary_enabled_phases.append(core_task_success_util_gen)
-
-    record = BadgingDEI(**record)
-    session.add(record)
-    
-    deiHook = CollectionRequest("core",primary_enabled_phases)
-    deiHook.repo_list = [repo_url]
-
-    singleRoutine = AugurTaskRoutine(logger, session,[deiHook])
-    singleRoutine.start_data_collection()
-    #start_block_of_repos(logger, session, [repo_url], primary_enabled_phases, "new")
-
-    session.close()
+
+    # Use context manager to ensure proper session cleanup
+    with DatabaseSession(logger, engine=current_app.engine) as session:
+        repo: Repo = session.query(Repo).filter(Repo.repo_git==repo_url).first()
+        if repo:
+            # Making the assumption that only new repos will be added with this endpoint
+            return jsonify({"status": "Repo already exists"})
+
+        frontend_repo_group: RepoGroup = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first()
+        repo_id = Repo.insert_github_repo(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="")
+        if not repo_id:
+            return jsonify({"status": "Error adding repo"})
+
+        repo = Repo.get_by_id(session, repo_id)
+        repo_git = repo.repo_git
+        pr_issue_count = get_repo_weight_by_issue(logger, repo_git)
+
+        record = {
+            "repo_id": repo_id,
+            "issue_pr_sum": pr_issue_count,
+            "core_weight": -9223372036854775808,
+            "secondary_weight": -9223372036854775808,
+            "ml_weight": -9223372036854775808
+        }
+
+        collection_status_unique = ["repo_id"]
+        session.insert_data(record, CollectionStatus, collection_status_unique, on_conflict_update=False)
+
+        record = {
+            "badging_id": dei_id,
+            "level": level,
+            "repo_id": repo_id
+        }
+
+        enabled_phase_names = get_enabled_phase_names_from_config_session(session, logger)
+
+        # Primary collection hook.
+        primary_enabled_phases = []
+
+        # Primary jobs
+        if prelim_phase.__name__ in enabled_phase_names:
+            primary_enabled_phases.append(prelim_phase)
+
+        primary_enabled_phases.append(primary_repo_collect_phase)
+
+        #task success is scheduled no matter what the config says.
+        def core_task_success_util_gen(repo_git):
+            return core_task_success_util.si(repo_git)
+
+        primary_enabled_phases.append(core_task_success_util_gen)
+
+        record = BadgingDEI(**record)
+        session.add(record)
+
+        # Explicitly commit the session to persist BadgingDEI record
+        session.commit()
+
+        deiHook = CollectionRequest("core",primary_enabled_phases)
+        deiHook.repo_list = [repo_url]
+
+        singleRoutine = AugurTaskRoutine(logger, session,[deiHook])
+        singleRoutine.start_data_collection()
+        #start_block_of_repos(logger, session, [repo_url], primary_enabled_phases, "new")
 
     return jsonify({"status": "Success"})
 
@@ -108,25 +109,31 @@ def dei_report(application: ClientApplication):
 
     if not dei_id:
         return jsonify({"status": "Missing argument"}), 400
-    
-    session = DatabaseSession(logger, engine=current_app.engine)
 
-    project: BadgingDEI = session.query(BadgingDEI).filter(BadgingDEI.badging_id==dei_id).first()
+    # Use context manager but scope it carefully to cover lazy-loading
+    with DatabaseSession(logger, engine=current_app.engine) as session:
+        project: BadgingDEI = session.query(BadgingDEI).filter(BadgingDEI.badging_id==dei_id).first()
+
+        if not project:
+            return jsonify({"status": "Invalid ID"})
+
+        # Render template while session is still open (accesses project.repo via lazy-loading)
+        md = render_template("dei-badging-report.j2", project=project)
 
-    if not project:
-        return jsonify({"status": "Invalid ID"})
-    
-    md = render_template("dei-badging-report.j2", project=project)
+        # Store project.id before session closes
+        project_id = project.id
+
+    # Session is now closed - proceed with file operations (no database access needed)
     cachePath = Path.cwd() / "augur" / "static" / "cache"
 
-    source = cachePath / f"{project.id}_badging_report.md"
-    report = cachePath / f"{project.id}_badging_report.pdf"
+    source = cachePath / f"{project_id}_badging_report.md"
+    report = cachePath / f"{project_id}_badging_report.pdf"
     source.write_text(md)
 
     command = f"mdpdf -o {str(report.resolve())} {str(source.resolve())}"
     converter = subprocess.Popen(command.split())
     converter.wait()
-    
+
     # TODO what goes in the report?
 
     return send_file(report.resolve())
\ No newline at end of file
diff --git a/augur/application/cli/user.py b/augur/application/cli/user.py
index 2cae5d7b22..3787708252 100644
--- a/augur/application/cli/user.py
+++ b/augur/application/cli/user.py
@@ -38,41 +38,44 @@ def add_user(username, email, firstname, lastname, admin, phone_number, password
     """Add a new user to the database with email address = EMAIL."""
 
     session = Session()
+    try:
+        if session.query(User).filter(User.login_name == username).first() is not None:
+            return click.echo("username already taken")
 
-    if session.query(User).filter(User.login_name == username).first() is not None:
-        return click.echo("username already taken")
+        if session.query(User).filter(User.email == email).first() is not None:
+            return click.echo("email already signed-up")
 
-    if session.query(User).filter(User.email == email).first() is not None:
-        return click.echo("email already signed-up")
-
-    user = session.query(User).filter(User.login_name == username).first()
-    if not user:
-        password = User.compute_hashsed_password(password)
-        new_user = User(login_name=username, login_hashword=password, email=email, text_phone=phone_number, first_name=firstname, last_name=lastname, admin=admin, tool_source="User CLI", tool_version=None, data_source="CLI")
-        session.add(new_user)
-        session.commit()
-        user_type = "admin user" if admin else "user"
-        message = f"Successfully added new: {username}"
-        click.secho(message, bold=True)
+        user = session.query(User).filter(User.login_name == username).first()
+        if not user:
+            password = User.compute_hashsed_password(password)
+            new_user = User(login_name=username, login_hashword=password, email=email, text_phone=phone_number, first_name=firstname, last_name=lastname, admin=admin, tool_source="User CLI", tool_version=None, data_source="CLI")
+            session.add(new_user)
+            session.commit()
+            user_type = "admin user" if admin else "user"
+            message = f"Successfully added new: {username}"
+            click.secho(message, bold=True)
 
+            return 0
+    finally:
         session.close()
         engine.dispose()
-        
-        return 0
 
 @cli.command('password_reset', short_help="Reset a user's password")
 @click.argument("username")
 @click.password_option(help="New password")
 def reset_password(username, password):
     session = Session()
+    try:
+        user = session.query(User).filter(User.login_name == username).first()
 
-    user = session.query(User).filter(User.login_name == username).first()
+        if not user:
+            return click.echo("invalid username")
 
-    if not user:
-        return click.echo("invalid username")
-    
-    password = User.compute_hashsed_password(password)
-    user.login_hashword = password
-    session.commit()
+        password = User.compute_hashsed_password(password)
+        user.login_hashword = password
+        session.commit()
 
-    return click.echo("Password updated")
\ No newline at end of file
+        return click.echo("Password updated")
+    finally:
+        session.close()
+        engine.dispose()
\ No newline at end of file

From 7aefd1bef36fbf41775229969065674f92aa3e26 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Wed, 19 Nov 2025 12:03:55 -0500
Subject: [PATCH 035/104] Fix deadlock issues by implementing timeout handling
 for git operations

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 .../facade_worker/facade_worker/repofetch.py  | 204 ++++++++++++++----
 .../facade_worker/utilitymethods.py           |  21 +-
 2 files changed, 178 insertions(+), 47 deletions(-)

diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py
index f754f4e098..18854c00bd 100644
--- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py
+++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py
@@ -149,7 +149,18 @@ def git_repo_initialize(facade_helper, session, repo_git):
         facade_helper.log_activity('Verbose', f"Cloning: {git}")
 
         cmd = f"git -C {repo_path} clone '{git}' {repo_name}"
-        return_code = subprocess.Popen([cmd], shell=True).wait()
+        try:
+            result = subprocess.run(
+                cmd, shell=True,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                timeout=7200,  # 2 hours for large repos
+                check=False
+            )
+            return_code = result.returncode
+        except subprocess.TimeoutExpired:
+            facade_helper.log_activity('Error', f'Git clone timed out: {cmd}')
+            return_code = -1  # Timeout error code
 
         if (return_code == 0):
             # If cloning succeeded, repo is ready for analysis
@@ -317,8 +328,18 @@ def git_repo_updates(facade_helper, repo_git):
 
             firstpull = (f"git -C {absolute_path} pull")
 
-            return_code_remote = subprocess.Popen(
-                [firstpull], shell=True).wait()
+            try:
+                result = subprocess.run(
+                    firstpull, shell=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    timeout=600,  # 10 minutes for git pull
+                    check=False
+                )
+                return_code_remote = result.returncode
+            except subprocess.TimeoutExpired:
+                facade_helper.log_activity('Error', f'Git operation timed out: {firstpull}')
+                return_code_remote = -1  # Timeout error code
 
             facade_helper.log_activity('Verbose', 'Got to here. 1.')
 
@@ -334,13 +355,20 @@ def git_repo_updates(facade_helper, repo_git):
                 getremotedefault = (
                     f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'")
 
-                return_code_remote = subprocess.Popen(
-                    [getremotedefault], stdout=subprocess.PIPE, shell=True).wait()
-
-                remotedefault = subprocess.Popen(
-                    [getremotedefault], stdout=subprocess.PIPE, shell=True).communicate()[0]
-
-                remotedefault = remotedefault.decode()
+                try:
+                    result = subprocess.run(
+                        getremotedefault, shell=True,
+                        capture_output=True,
+                        encoding='utf-8', errors='replace',
+                        timeout=60,  # 1 minute for remote query
+                        check=False
+                    )
+                    return_code_remote = result.returncode
+                    remotedefault = result.stdout.strip()
+                except subprocess.TimeoutExpired:
+                    facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}')
+                    return_code_remote = -1
+                    remotedefault = ''
 
                 facade_helper.log_activity(
                     'Verbose', f'remote default getting checked out is: {remotedefault}.')
@@ -351,14 +379,35 @@ def git_repo_updates(facade_helper, repo_git):
                 facade_helper.log_activity(
                     'Verbose', f"get remote default command is: \n \n {getremotedefault} \n \n ")
 
-                return_code_remote_default_again = subprocess.Popen(
-                    [getremotedefault], shell=True).wait()
+                try:
+                    result = subprocess.run(
+                        getremotedefault, shell=True,
+                        stdout=subprocess.DEVNULL,
+                        stderr=subprocess.DEVNULL,
+                        timeout=600,  # 10 minutes for git checkout
+                        check=False
+                    )
+                    return_code_remote_default_again = result.returncode
+                except subprocess.TimeoutExpired:
+                    facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}')
+                    return_code_remote_default_again = -1  # Timeout error code
 
                 if return_code_remote_default_again == 0:
                     facade_helper.log_activity('Verbose', "local checkout worked.")
                     cmd = (f"git -C {absolute_path} pull")
 
-                    return_code = subprocess.Popen([cmd], shell=True).wait()
+                    try:
+                        result = subprocess.run(
+                            cmd, shell=True,
+                            stdout=subprocess.DEVNULL,
+                            stderr=subprocess.DEVNULL,
+                            timeout=600,  # 10 minutes for git pull
+                            check=False
+                        )
+                        return_code = result.returncode
+                    except subprocess.TimeoutExpired:
+                        facade_helper.log_activity('Error', f'Git operation timed out: {cmd}')
+                        return_code = -1  # Timeout error code
 
         except Exception as e:
             facade_helper.log_activity(
@@ -369,7 +418,18 @@ def git_repo_updates(facade_helper, repo_git):
 
             cmd = (f"git -C {absolute_path} pull")
 
-            return_code = subprocess.Popen([cmd], shell=True).wait()
+            try:
+                result = subprocess.run(
+                    cmd, shell=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    timeout=600,  # 10 minutes for git pull
+                    check=False
+                )
+                return_code = result.returncode
+            except subprocess.TimeoutExpired:
+                facade_helper.log_activity('Error', f'Git operation timed out: {cmd}')
+                return_code = -1  # Timeout error code
 
         # If the attempt succeeded, then don't try any further fixes. If
         # the attempt to fix things failed, give up and try next time.
@@ -392,37 +452,58 @@ def git_repo_updates(facade_helper, repo_git):
             getremotedefault = (
                 f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'")
 
-            return_code_remote = subprocess.Popen(
-                [getremotedefault], stdout=subprocess.PIPE, shell=True).wait()
-
-            remotedefault = subprocess.Popen(
-                [getremotedefault], stdout=subprocess.PIPE, shell=True).communicate()[0]
-
-            remotedefault = remotedefault.decode()
+            try:
+                result = subprocess.run(
+                    getremotedefault, shell=True,
+                    capture_output=True,
+                    encoding='utf-8', errors='replace',
+                    timeout=60,  # 1 minute for remote query
+                    check=False
+                )
+                return_code_remote = result.returncode
+                remotedefault = result.stdout.strip()
+            except subprocess.TimeoutExpired:
+                facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}')
+                return_code_remote = -1
+                remotedefault = ''
 
             try:
 
                 getremotedefault = (
                     f"git -C {absolute_path} checkout {remotedefault}")
 
-                return_code_remote_default = subprocess.Popen(
-                    [getremotedefault], stdout=subprocess.PIPE, shell=True).wait()
-
-                return_message_getremotedefault = subprocess.Popen(
-                    [getremotedefault], stdout=subprocess.PIPE, shell=True).communicate()[0]
+                try:
+                    result = subprocess.run(
+                        getremotedefault, shell=True,
+                        stdout=subprocess.DEVNULL,
+                        stderr=subprocess.DEVNULL,
+                        timeout=600,  # 10 minutes for git checkout
+                        check=False
+                    )
+                    return_code_remote_default = result.returncode
+                except subprocess.TimeoutExpired:
+                    facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}')
+                    return_code_remote_default = -1  # Timeout error code
 
                 facade_helper.log_activity(
-                    'Verbose', f'get remote default result: {return_message_getremotedefault}')
+                    'Verbose', f'get remote default result (return code): {return_code_remote_default}')
 
                 getcurrentbranch = (f"git -C {absolute_path} branch")
 
-                return_code_local = subprocess.Popen(
-                    [getcurrentbranch], stdout=subprocess.PIPE, shell=True).wait()
-
-                localdefault = subprocess.Popen(
-                    [getcurrentbranch], stdout=subprocess.PIPE, shell=True).communicate()[0]
-
-                localdefault = localdefault.decode()
+                try:
+                    result = subprocess.run(
+                        getcurrentbranch, shell=True,
+                        capture_output=True,
+                        encoding='utf-8', errors='replace',
+                        timeout=60,  # 1 minute for branch query
+                        check=False
+                    )
+                    return_code_local = result.returncode
+                    localdefault = result.stdout
+                except subprocess.TimeoutExpired:
+                    facade_helper.log_activity('Error', f'Git operation timed out: {getcurrentbranch}')
+                    return_code_local = -1
+                    localdefault = ''
 
                 facade_helper.log_activity(
                     'Verbose', f'remote default is: {remotedefault}, and localdefault is {localdefault}.')
@@ -430,20 +511,50 @@ def git_repo_updates(facade_helper, repo_git):
                 cmd_checkout_default = (
                     f"git -C {absolute_path} checkout {remotedefault}")
 
-                cmd_checkout_default_wait = subprocess.Popen(
-                    [cmd_checkout_default], shell=True).wait()
+                try:
+                    result = subprocess.run(
+                        cmd_checkout_default, shell=True,
+                        stdout=subprocess.DEVNULL,
+                        stderr=subprocess.DEVNULL,
+                        timeout=600,  # 10 minutes for git checkout
+                        check=False
+                    )
+                    cmd_checkout_default_wait = result.returncode
+                except subprocess.TimeoutExpired:
+                    facade_helper.log_activity('Error', f'Git operation timed out: {cmd_checkout_default}')
+                    cmd_checkout_default_wait = -1
 
                 cmdpull2 = (f"git -C {absolute_path} pull")
 
                 cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}")
 
-                cmd_reset_wait = subprocess.Popen(
-                    [cmd_reset], shell=True).wait()
+                try:
+                    result = subprocess.run(
+                        cmd_reset, shell=True,
+                        stdout=subprocess.DEVNULL,
+                        stderr=subprocess.DEVNULL,
+                        timeout=300,  # 5 minutes for git reset
+                        check=False
+                    )
+                    cmd_reset_wait = result.returncode
+                except subprocess.TimeoutExpired:
+                    facade_helper.log_activity('Error', f'Git operation timed out: {cmd_reset}')
+                    cmd_reset_wait = -1
 
                 cmd_clean = (f"git -C {absolute_path} clean -df")
 
-                return_code_clean = subprocess.Popen(
-                    [cmd_clean], shell=True).wait()
+                try:
+                    result = subprocess.run(
+                        cmd_clean, shell=True,
+                        stdout=subprocess.DEVNULL,
+                        stderr=subprocess.DEVNULL,
+                        timeout=300,  # 5 minutes for git clean
+                        check=False
+                    )
+                    return_code_clean = result.returncode
+                except subprocess.TimeoutExpired:
+                    facade_helper.log_activity('Error', f'Git operation timed out: {cmd_clean}')
+                    return_code_clean = -1
 
             except Exception as e:
 
@@ -453,7 +564,18 @@ def git_repo_updates(facade_helper, repo_git):
         cmdpull2 = (f"git -C {absolute_path} pull")
 
         print(cmdpull2)
-        return_code = subprocess.Popen([cmdpull2], shell=True).wait()
+        try:
+            result = subprocess.run(
+                cmdpull2, shell=True,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                timeout=600,  # 10 minutes for git pull
+                check=False
+            )
+            return_code = result.returncode
+        except subprocess.TimeoutExpired:
+            facade_helper.log_activity('Error', f'Git operation timed out: {cmdpull2}')
+            return_code = -1  # Timeout error code
 
         attempt += 1
 
diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py
index c06614ac7d..6df720584c 100644
--- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py
+++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py
@@ -105,13 +105,22 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name):
 	
 	return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}"
 
-def get_parent_commits_set(absolute_repo_path):
-	
-	parents = subprocess.Popen(["git --git-dir %s log --ignore-missing "
-								"--pretty=format:'%%H'" % (absolute_repo_path)],
-	stdout=subprocess.PIPE, shell=True)
+def get_parent_commits_set(absolute_repo_path, logger=None):
 
-	parent_commits = set(parents.stdout.read().decode("utf-8",errors="ignore").split(os.linesep))
+	cmd = "git --git-dir %s log --ignore-missing --pretty=format:'%%H'" % (absolute_repo_path)
+	try:
+		result = subprocess.run(
+			cmd, shell=True,
+			capture_output=True,
+			encoding='utf-8', errors='replace',  # Handle non-UTF-8 gracefully
+			timeout=600,  # 10 minutes for git log
+			check=False
+		)
+		parent_commits = set(result.stdout.split(os.linesep))
+	except subprocess.TimeoutExpired:
+		if logger:
+			logger.error(f"Git log timed out for repo: {absolute_repo_path}")
+		parent_commits = set()  # Return empty set on timeout
 
 	# If there are no commits in the range, we still get a blank entry in
 	# the set. Remove it, as it messes with the calculations

From 0d487afd9774aa495b499248fc334b23266fc435 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 11:05:25 -0500
Subject: [PATCH 036/104] fix: Use list.clear() in facade tasks to reduce
 memory overhead

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/git/facade_tasks.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py
index 5baaed20d4..d5ead38d14 100644
--- a/augur/tasks/git/facade_tasks.py
+++ b/augur/tasks/git/facade_tasks.py
@@ -212,7 +212,7 @@ def facade_fetch_missing_commit_messages(repo_git):
             
             if len(to_insert) >= 1000:
                 bulk_insert_dicts(logger,to_insert, CommitMessage, ["repo_id","cmt_hash"])
-                to_insert = []
+                to_insert.clear()
             
             to_insert.append(msg_record)
         except Exception as e: 
@@ -313,13 +313,14 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
                 )
                 if pendingCommitRecordsToInsert:
                     facade_bulk_insert_commits(logger, pendingCommitRecordsToInsert)
-                pendingCommitRecordsToInsert = []
+                pendingCommitRecordsToInsert.clear()
 
         if commit_msg:
             pendingCommitMessageRecordsToInsert.append(commit_msg)
 
         if len(pendingCommitMessageRecordsToInsert) >= 1000:
             bulk_insert_dicts(logger, pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id", "cmt_hash"])
+            pendingCommitMessageRecordsToInsert.clear()
 
     # FINAL MESSAGE INSERT
     bulk_insert_dicts(logger, pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id", "cmt_hash"])

From 19d0a9b37c00cff9ca9d89c01c9d3a8f32db39b4 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 11:05:46 -0500
Subject: [PATCH 037/104] fix: Process facade contributor results in batches

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/facade_github/tasks.py | 33 +++++++++++++++++++----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py
index eff64df6ee..3396de7b64 100644
--- a/augur/tasks/github/facade_github/tasks.py
+++ b/augur/tasks/github/facade_github/tasks.py
@@ -252,7 +252,6 @@ def insert_facade_contributors(self, repo_git):
 
     #Execute statement with session.
     result = execute_sql(new_contrib_sql)
-    new_contribs = [dict(row) for row in result.mappings()]
 
     #print(new_contribs)
 
@@ -262,7 +261,20 @@ def insert_facade_contributors(self, repo_git):
 
     key_auth = GithubRandomKeyAuth(logger)
 
-    process_commit_metadata(logger, key_auth, list(new_contribs), repo_id, platform_id)
+    # Process results in batches to reduce memory usage
+    batch = []
+    BATCH_SIZE = 1000
+
+    for row in result.mappings():
+        batch.append(dict(row))
+
+        if len(batch) >= BATCH_SIZE:
+            process_commit_metadata(logger, key_auth, batch, repo_id, platform_id)
+            batch.clear()
+
+    # Process remaining items in batch
+    if batch:
+        process_commit_metadata(logger, key_auth, batch, repo_id, platform_id)
 
     logger.debug("DEBUG: Got through the new_contribs")
     
@@ -300,10 +312,21 @@ def insert_facade_contributors(self, repo_git):
 
 
     result = execute_sql(resolve_email_to_cntrb_id_sql)
-    existing_cntrb_emails = [dict(row) for row in result.mappings()]
 
-    print(existing_cntrb_emails)
-    link_commits_to_contributor(logger, facade_helper,list(existing_cntrb_emails))
+    # Process results in batches to reduce memory usage
+    batch = []
+    BATCH_SIZE = 1000
+
+    for row in result.mappings():
+        batch.append(dict(row))
+
+        if len(batch) >= BATCH_SIZE:
+            link_commits_to_contributor(logger, facade_helper, batch)
+            batch.clear()
+
+    # Process remaining items in batch
+    if batch:
+        link_commits_to_contributor(logger, facade_helper, batch)
 
     return
 

From 05165f10838423b244b99c9c7edb4d1589ab445c Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 11:06:26 -0500
Subject: [PATCH 038/104] fix: Convert issues collection to generator pattern
 with batching

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/issues.py | 68 ++++++++++++++++++++++++++++--------
 1 file changed, 54 insertions(+), 14 deletions(-)

diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py
index 37bee5c8dd..d100d511bc 100644
--- a/augur/tasks/github/issues.py
+++ b/augur/tasks/github/issues.py
@@ -1,6 +1,6 @@
 import logging
 import traceback
-from datetime import timedelta, timezone
+from datetime import timedelta, timezone, datetime
 
 from sqlalchemy.exc import IntegrityError
 
@@ -20,9 +20,21 @@
 development = get_development_flag()
 
 @celery.task(base=AugurCoreRepoCollectionTask)
-def collect_issues(repo_git : str, full_collection: bool) -> int:
+def collect_issues(repo_git: str, full_collection: bool) -> int:
+    """
+    Collect all issues (excluding pull requests) for a repository.
 
-    logger = logging.getLogger(collect_issues.__name__) 
+    Retrieves issues from GitHub API in batches of 1000 and inserts them along with
+    related labels, assignees, and contributors.
+
+    Args:
+        repo_git: Full git URL (e.g., 'https://github.com/chaoss/augur')
+        full_collection: True for all historical data, False for incremental (last collection - 2 days)
+
+    Returns:
+        Number of issues collected, or -1 on error
+    """
+    logger = logging.getLogger(collect_issues.__name__)
 
     repo_id = get_repo_by_repo_git(repo_git).repo_id
 
@@ -31,33 +43,60 @@ def collect_issues(repo_git : str, full_collection: bool) -> int:
     if full_collection:
         core_data_last_collected = None
     else:
-        # subtract 2 days to ensure all data is collected 
+        # Subtract 2 days to ensure all data is collected
         core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc)
 
     key_auth = GithubRandomKeyAuth(logger)
 
     logger.info(f'this is the manifest.key_auth value: {str(key_auth)}')
 
-    try:    
-        issue_data = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected)
+    try:
+        issue_data_generator = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected)
 
-        if not issue_data:
-            logger.info(f"{owner}/{repo} has no issues")
-            return 0
+        # Process issues in batches to avoid memory spikes
+        batch = []
+        total_issues = 0
+        batch_size = 1000
+
+        for issue in issue_data_generator:
+            batch.append(issue)
 
-        total_issues = len(issue_data)
-        process_issues(issue_data, f"{owner}/{repo}: Issue task", repo_id, logger)
+            if len(batch) >= batch_size:
+                logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues})")
+                process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger)
+                total_issues += len(batch)
+                batch.clear()
+
+        # Process remaining issues in the last batch
+        if len(batch) > 0:
+            logger.info(f"{owner}/{repo}: Processing final batch of {len(batch)} issues")
+            process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger)
+            total_issues += len(batch)
+
+        if total_issues == 0:
+            logger.info(f"{owner}/{repo} has no issues")
 
         return total_issues
-            
+
     except Exception as e:
         logger.error(f"Could not collect issues for repo {repo_git}\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}")
         return -1
 
 
 
-def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None:
+def retrieve_all_issue_data(repo_git: str, logger:logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None):
+    """
+    Retrieve all issue data for a repository as a generator.
+
+    Returns a generator to avoid materializing all issues in memory at once.
+    This is critical for repos with 10,000+ issues to prevent memory spikes.
 
+    Args:
+        repo_git (str): The GitHub repository in "owner/repo" format.
+        logger (logging.Logger): Logger for logging messages.
+        key_auth (GithubRandomKeyAuth): Auth handler for GitHub API.
+        since (datetime, optional): Only issues updated since this datetime will be retrieved.
+    """
     owner, repo = get_owner_repo(repo_git)
 
     logger.info(f"Collecting issues for {owner}/{repo}")
@@ -74,7 +113,8 @@ def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None:
 
     issues_paginator = github_data_access.paginate_resource(url)
 
-    return list(issues_paginator)
+    # Return the generator directly instead of materializing it
+    return issues_paginator
     
 def process_issues(issues, task_name, repo_id, logger) -> None:
     

From 40f9fab2ee1228db3aa65f0191e2c8541def9dd8 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 11:06:50 -0500
Subject: [PATCH 039/104] fix: Add batch processing to PR commits and files
 collection

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 .../pull_requests/commits_model/core.py       | 14 +++++++---
 .../github/pull_requests/files_model/core.py  | 14 +++++++---
 augur/tasks/github/pull_requests/tasks.py     | 27 ++++++-------------
 3 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py
index 2df6d66f5d..83b283bb6d 100644
--- a/augur/tasks/github/pull_requests/commits_model/core.py
+++ b/augur/tasks/github/pull_requests/commits_model/core.py
@@ -43,13 +43,15 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti
     logger.info(f"Getting pull request commits for repo: {repo.repo_git}")
 
     github_data_access = GithubDataAccess(key_auth, logger)
-        
+
+    BATCH_SIZE = 1000
+    pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"]
     all_data = []
     for index,pr_info in enumerate(pr_urls):
         logger.info(f'{task_name}: Querying commits for pull request #{index + 1} of {len(pr_urls)}')
 
         commits_url = pr_info['pr_url'] + '/commits?state=all'
-        
+
         if not pr_info.get('pr_url'):
             logger.warning(f"{task_name}: No pr_url found for pull request info: {pr_info}. Skipping.")
             continue
@@ -70,13 +72,17 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti
                     'repo_id': repo.repo_id,
                 }
                 all_data.append(pr_commit_row)
+
+                if len(all_data) >= BATCH_SIZE:
+                    logger.info(f"{task_name}: Inserting {len(all_data)} rows")
+                    augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys)
+                    all_data.clear()
         except UrlNotFoundException:
             logger.info(f"{task_name}: PR with url of {pr_info['pr_url']} returned 404 on commit data. Skipping.")
             continue
-            
+
     if len(all_data) > 0:
         logger.info(f"{task_name}: Inserting {len(all_data)} rows")
-        pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"]
         augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys)
             
 
diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py
index cbecb44d6d..60222a3bc1 100644
--- a/augur/tasks/github/pull_requests/files_model/core.py
+++ b/augur/tasks/github/pull_requests/files_model/core.py
@@ -40,12 +40,14 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection
 
     github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger)
 
+    BATCH_SIZE = 1000
+    pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"]
     pr_file_rows = []
     logger.info(f"Getting pull request files for repo: {repo.repo_git}")
     for index, pr_info in enumerate(pr_numbers):
 
         logger.info(f'Querying files for pull request #{index + 1} of {len(pr_numbers)}')
-        
+
         query = """
             query($repo: String!, $owner: String!,$pr_number: Int!, $numRecords: Int!, $cursor: String) {
                 repository(name: $repo, owner: $owner) {
@@ -68,7 +70,7 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection
                 }
             }
         """
-        
+
         values = ["repository", "pullRequest", "files"]
         params = {
             'owner': owner,
@@ -92,6 +94,11 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection
                 }
 
                 pr_file_rows.append(data)
+
+                if len(pr_file_rows) >= BATCH_SIZE:
+                    logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows")
+                    augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys)
+                    pr_file_rows.clear()
         except NotFoundException as e:
             logger.info(f"{task_name}: PR with number of {pr_info['pr_src_number']} returned 404 on file data. Skipping.")
             continue
@@ -101,6 +108,5 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection
 
 
     if len(pr_file_rows) > 0:
-        # Execute a bulk upsert with sqlalchemy 
-        pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"]
+        logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows")
         augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys)
diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py
index 812a4eef25..88cb5afe21 100644
--- a/augur/tasks/github/pull_requests/tasks.py
+++ b/augur/tasks/github/pull_requests/tasks.py
@@ -381,6 +381,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
             logger.debug(f"{owner}/{repo} No pr reviews for repo")
             return
 
+        # Process contributors (all_pr_reviews already in memory, so no OOM risk)
         contributors = []
         for pull_request_id, reviews in all_pr_reviews.items():
 
@@ -389,32 +390,20 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
                 if contributor:
                     contributors.append(contributor)
 
-            logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors")
-            augur_db.insert_data(contributors, Contributor, ["cntrb_id"])
+        logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors")
+        augur_db.insert_data(contributors, Contributor, ["cntrb_id"])
 
 
+        # Process pr reviews (all_pr_reviews already in memory, so no OOM risk)
         pr_reviews = []
         for pull_request_id, reviews in all_pr_reviews.items():
 
             for review in reviews:
-                
+
                 if "cntrb_id" in review:
                     pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version))
 
-            logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}")
-            pr_review_natural_keys = ["pr_review_src_id",]
-            augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys)
-
-
-
-
-
-
-
-
-
-
-
-
-
+        logger.info(f"{owner}/{repo}: Inserting {len(pr_reviews)} pr reviews")
+        pr_review_natural_keys = ["pr_review_src_id",]
+        augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys)
 

From a2c1b78a4fda93a265e4f4c7da1b5f95d9da04d2 Mon Sep 17 00:00:00 2001
From: "Sean P. Goggins" <s@goggins.com>
Date: Thu, 20 Nov 2025 11:42:45 -0600
Subject: [PATCH 040/104] Update augur/tasks/github/issues.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Sean P. Goggins <s@goggins.com>
---
 augur/tasks/github/issues.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py
index d100d511bc..68cae4d30c 100644
--- a/augur/tasks/github/issues.py
+++ b/augur/tasks/github/issues.py
@@ -84,7 +84,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int:
 
 
 
-def retrieve_all_issue_data(repo_git: str, logger:logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None):
+def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None):
     """
     Retrieve all issue data for a repository as a generator.
 

From 11019b796b9368fe8185da649217bb6644f05a50 Mon Sep 17 00:00:00 2001
From: "Sean P. Goggins" <s@goggins.com>
Date: Thu, 20 Nov 2025 11:43:41 -0600
Subject: [PATCH 041/104] Update augur/tasks/github/issues.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Sean P. Goggins <s@goggins.com>
---
 augur/tasks/github/issues.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py
index 68cae4d30c..aaca35ed5f 100644
--- a/augur/tasks/github/issues.py
+++ b/augur/tasks/github/issues.py
@@ -62,7 +62,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int:
             batch.append(issue)
 
             if len(batch) >= batch_size:
-                logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues})")
+                logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues + len(batch)})")
                 process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger)
                 total_issues += len(batch)
                 batch.clear()

From 6365814fd1dae26819b796f0c466bc9cc46193a0 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 17:05:21 -0500
Subject: [PATCH 042/104] fix: Optimize database cursor usage by fetching
 results immediately in insert_facade_contributors

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/facade_github/tasks.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py
index 3396de7b64..73fd9a51b5 100644
--- a/augur/tasks/github/facade_github/tasks.py
+++ b/augur/tasks/github/facade_github/tasks.py
@@ -253,6 +253,10 @@ def insert_facade_contributors(self, repo_git):
     #Execute statement with session.
     result = execute_sql(new_contrib_sql)
 
+    # Fetch all results immediately to close the database cursor/connection
+    # This prevents holding the connection open during GitHub API calls
+    rows = result.mappings().fetchall()
+
     #print(new_contribs)
 
     #json.loads(pd.read_sql(new_contrib_sql, self.db, params={
@@ -265,7 +269,7 @@ def insert_facade_contributors(self, repo_git):
     batch = []
     BATCH_SIZE = 1000
 
-    for row in result.mappings():
+    for row in rows:
         batch.append(dict(row))
 
         if len(batch) >= BATCH_SIZE:
@@ -313,11 +317,15 @@ def insert_facade_contributors(self, repo_git):
 
     result = execute_sql(resolve_email_to_cntrb_id_sql)
 
+    # Fetch all results immediately to close the database cursor/connection
+    # This prevents holding the connection open during database UPDATE operations
+    rows = result.mappings().fetchall()
+
     # Process results in batches to reduce memory usage
     batch = []
     BATCH_SIZE = 1000
 
-    for row in result.mappings():
+    for row in rows:
         batch.append(dict(row))
 
         if len(batch) >= BATCH_SIZE:

From f8f06a259b78f0d7396539df5a7cefd4154d6349 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 17:30:56 -0500
Subject: [PATCH 043/104] refactor git command execution to use unified timeout
 handling across facade operation

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/git/facade_tasks.py               |   4 +-
 .../facade_worker/facade_worker/config.py     |  46 ++++
 .../facade_worker/facade_worker/repofetch.py  | 240 ++++++------------
 .../facade_worker/utilitymethods.py           |  28 +-
 4 files changed, 140 insertions(+), 178 deletions(-)

diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py
index 5baaed20d4..b0d638768f 100644
--- a/augur/tasks/git/facade_tasks.py
+++ b/augur/tasks/git/facade_tasks.py
@@ -121,7 +121,7 @@ def trim_commits_post_analysis_facade_task(repo_git):
     repo_loc = (f"{absolute_path}/.git")
     # Grab the parents of HEAD
 
-    parent_commits = get_parent_commits_set(repo_loc)
+    parent_commits = get_parent_commits_set(repo_loc, facade_helper)
 
     # Grab the existing commits from the database
     existing_commits = get_existing_commits_set(repo_id)
@@ -244,7 +244,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
     repo_loc = (f"{absolute_path}/.git")
     # Grab the parents of HEAD
 
-    parent_commits = get_parent_commits_set(repo_loc)
+    parent_commits = get_parent_commits_set(repo_loc, facade_helper)
 
     # Grab the existing commits from the database
     existing_commits = get_existing_commits_set(repo_id)
diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py
index 21fe424d10..6f9cd2cc98 100644
--- a/augur/tasks/git/util/facade_worker/facade_worker/config.py
+++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py
@@ -29,6 +29,7 @@
 import json
 import logging
 import random
+import subprocess
 from urllib.parse import urlparse
 import sqlalchemy as s
 from sqlalchemy.exc import OperationalError
@@ -254,3 +255,48 @@ def insert_or_update_data(self, query, **bind_args)-> None:
             return
     def inc_repos_processed(self):
         self.repos_processed += 1
+
+    def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple:
+        """
+        Execute a git command with timeout handling.
+
+        This method provides a unified interface for running git commands with
+        consistent timeout handling and error logging across all facade operations.
+
+        Args:
+            cmd: The git command to execute
+            timeout: Timeout in seconds
+            capture_output: If True, capture stdout/stderr; if False, discard them
+            operation_description: Human-readable description for error logging
+                                 (defaults to cmd if not provided)
+
+        Returns:
+            tuple: (return_code, stdout_content)
+                   return_code is -1 on timeout
+                   stdout_content is empty string if capture_output=False
+        """
+        if operation_description is None:
+            operation_description = cmd
+
+        try:
+            if capture_output:
+                result = subprocess.run(
+                    cmd, shell=True,
+                    capture_output=True,
+                    encoding='utf-8', errors='replace',
+                    timeout=timeout,
+                    check=False
+                )
+                return result.returncode, result.stdout.strip()
+            else:
+                result = subprocess.run(
+                    cmd, shell=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    timeout=timeout,
+                    check=False
+                )
+                return result.returncode, ''
+        except subprocess.TimeoutExpired:
+            self.log_activity('Error', f'Git operation timed out: {operation_description}')
+            return -1, ''
diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py
index 18854c00bd..6e911f6fd9 100644
--- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py
+++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py
@@ -149,18 +149,12 @@ def git_repo_initialize(facade_helper, session, repo_git):
         facade_helper.log_activity('Verbose', f"Cloning: {git}")
 
         cmd = f"git -C {repo_path} clone '{git}' {repo_name}"
-        try:
-            result = subprocess.run(
-                cmd, shell=True,
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-                timeout=7200,  # 2 hours for large repos
-                check=False
-            )
-            return_code = result.returncode
-        except subprocess.TimeoutExpired:
-            facade_helper.log_activity('Error', f'Git clone timed out: {cmd}')
-            return_code = -1  # Timeout error code
+        return_code, _ = facade_helper.run_git_command(
+            cmd,
+            timeout=7200,  # 2 hours for large repos
+            capture_output=False,
+            operation_description=f'git clone {git}'
+        )
 
         if (return_code == 0):
             # If cloning succeeded, repo is ready for analysis
@@ -328,18 +322,12 @@ def git_repo_updates(facade_helper, repo_git):
 
             firstpull = (f"git -C {absolute_path} pull")
 
-            try:
-                result = subprocess.run(
-                    firstpull, shell=True,
-                    stdout=subprocess.DEVNULL,
-                    stderr=subprocess.DEVNULL,
-                    timeout=600,  # 10 minutes for git pull
-                    check=False
-                )
-                return_code_remote = result.returncode
-            except subprocess.TimeoutExpired:
-                facade_helper.log_activity('Error', f'Git operation timed out: {firstpull}')
-                return_code_remote = -1  # Timeout error code
+            return_code_remote, _ = facade_helper.run_git_command(
+                firstpull,
+                timeout=600,  # 10 minutes for git pull
+                capture_output=False,
+                operation_description=f'git pull {repo.repo_git}'
+            )
 
             facade_helper.log_activity('Verbose', 'Got to here. 1.')
 
@@ -355,20 +343,12 @@ def git_repo_updates(facade_helper, repo_git):
                 getremotedefault = (
                     f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'")
 
-                try:
-                    result = subprocess.run(
-                        getremotedefault, shell=True,
-                        capture_output=True,
-                        encoding='utf-8', errors='replace',
-                        timeout=60,  # 1 minute for remote query
-                        check=False
-                    )
-                    return_code_remote = result.returncode
-                    remotedefault = result.stdout.strip()
-                except subprocess.TimeoutExpired:
-                    facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}')
-                    return_code_remote = -1
-                    remotedefault = ''
+                return_code_remote, remotedefault = facade_helper.run_git_command(
+                    getremotedefault,
+                    timeout=60,  # 1 minute for remote query
+                    capture_output=True,
+                    operation_description='get remote default branch'
+                )
 
                 facade_helper.log_activity(
                     'Verbose', f'remote default getting checked out is: {remotedefault}.')
@@ -379,35 +359,23 @@ def git_repo_updates(facade_helper, repo_git):
                 facade_helper.log_activity(
                     'Verbose', f"get remote default command is: \n \n {getremotedefault} \n \n ")
 
-                try:
-                    result = subprocess.run(
-                        getremotedefault, shell=True,
-                        stdout=subprocess.DEVNULL,
-                        stderr=subprocess.DEVNULL,
-                        timeout=600,  # 10 minutes for git checkout
-                        check=False
-                    )
-                    return_code_remote_default_again = result.returncode
-                except subprocess.TimeoutExpired:
-                    facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}')
-                    return_code_remote_default_again = -1  # Timeout error code
+                return_code_remote_default_again, _ = facade_helper.run_git_command(
+                    getremotedefault,
+                    timeout=600,  # 10 minutes for git checkout
+                    capture_output=False,
+                    operation_description=f'git checkout {remotedefault}'
+                )
 
                 if return_code_remote_default_again == 0:
                     facade_helper.log_activity('Verbose', "local checkout worked.")
                     cmd = (f"git -C {absolute_path} pull")
 
-                    try:
-                        result = subprocess.run(
-                            cmd, shell=True,
-                            stdout=subprocess.DEVNULL,
-                            stderr=subprocess.DEVNULL,
-                            timeout=600,  # 10 minutes for git pull
-                            check=False
-                        )
-                        return_code = result.returncode
-                    except subprocess.TimeoutExpired:
-                        facade_helper.log_activity('Error', f'Git operation timed out: {cmd}')
-                        return_code = -1  # Timeout error code
+                    return_code, _ = facade_helper.run_git_command(
+                        cmd,
+                        timeout=600,  # 10 minutes for git pull
+                        capture_output=False,
+                        operation_description=f'git pull {repo.repo_git}'
+                    )
 
         except Exception as e:
             facade_helper.log_activity(
@@ -418,18 +386,12 @@ def git_repo_updates(facade_helper, repo_git):
 
             cmd = (f"git -C {absolute_path} pull")
 
-            try:
-                result = subprocess.run(
-                    cmd, shell=True,
-                    stdout=subprocess.DEVNULL,
-                    stderr=subprocess.DEVNULL,
-                    timeout=600,  # 10 minutes for git pull
-                    check=False
-                )
-                return_code = result.returncode
-            except subprocess.TimeoutExpired:
-                facade_helper.log_activity('Error', f'Git operation timed out: {cmd}')
-                return_code = -1  # Timeout error code
+            return_code, _ = facade_helper.run_git_command(
+                cmd,
+                timeout=600,  # 10 minutes for git pull
+                capture_output=False,
+                operation_description=f'git pull {repo.repo_git}'
+            )
 
         # If the attempt succeeded, then don't try any further fixes. If
         # the attempt to fix things failed, give up and try next time.
@@ -452,58 +414,36 @@ def git_repo_updates(facade_helper, repo_git):
             getremotedefault = (
                 f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'")
 
-            try:
-                result = subprocess.run(
-                    getremotedefault, shell=True,
-                    capture_output=True,
-                    encoding='utf-8', errors='replace',
-                    timeout=60,  # 1 minute for remote query
-                    check=False
-                )
-                return_code_remote = result.returncode
-                remotedefault = result.stdout.strip()
-            except subprocess.TimeoutExpired:
-                facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}')
-                return_code_remote = -1
-                remotedefault = ''
+            return_code_remote, remotedefault = facade_helper.run_git_command(
+                getremotedefault,
+                timeout=60,  # 1 minute for remote query
+                capture_output=True,
+                operation_description='get remote default branch'
+            )
 
             try:
 
                 getremotedefault = (
                     f"git -C {absolute_path} checkout {remotedefault}")
 
-                try:
-                    result = subprocess.run(
-                        getremotedefault, shell=True,
-                        stdout=subprocess.DEVNULL,
-                        stderr=subprocess.DEVNULL,
-                        timeout=600,  # 10 minutes for git checkout
-                        check=False
-                    )
-                    return_code_remote_default = result.returncode
-                except subprocess.TimeoutExpired:
-                    facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}')
-                    return_code_remote_default = -1  # Timeout error code
+                return_code_remote_default, _ = facade_helper.run_git_command(
+                    getremotedefault,
+                    timeout=600,  # 10 minutes for git checkout
+                    capture_output=False,
+                    operation_description=f'git checkout {remotedefault}'
+                )
 
                 facade_helper.log_activity(
                     'Verbose', f'get remote default result (return code): {return_code_remote_default}')
 
                 getcurrentbranch = (f"git -C {absolute_path} branch")
 
-                try:
-                    result = subprocess.run(
-                        getcurrentbranch, shell=True,
-                        capture_output=True,
-                        encoding='utf-8', errors='replace',
-                        timeout=60,  # 1 minute for branch query
-                        check=False
-                    )
-                    return_code_local = result.returncode
-                    localdefault = result.stdout
-                except subprocess.TimeoutExpired:
-                    facade_helper.log_activity('Error', f'Git operation timed out: {getcurrentbranch}')
-                    return_code_local = -1
-                    localdefault = ''
+                return_code_local, localdefault = facade_helper.run_git_command(
+                    getcurrentbranch,
+                    timeout=60,  # 1 minute for branch query
+                    capture_output=True,
+                    operation_description='get current branch'
+                )
 
                 facade_helper.log_activity(
                     'Verbose', f'remote default is: {remotedefault}, and localdefault is {localdefault}.')
@@ -511,50 +451,32 @@ def git_repo_updates(facade_helper, repo_git):
                 cmd_checkout_default = (
                     f"git -C {absolute_path} checkout {remotedefault}")
 
-                try:
-                    result = subprocess.run(
-                        cmd_checkout_default, shell=True,
-                        stdout=subprocess.DEVNULL,
-                        stderr=subprocess.DEVNULL,
-                        timeout=600,  # 10 minutes for git checkout
-                        check=False
-                    )
-                    cmd_checkout_default_wait = result.returncode
-                except subprocess.TimeoutExpired:
-                    facade_helper.log_activity('Error', f'Git operation timed out: {cmd_checkout_default}')
-                    cmd_checkout_default_wait = -1
+                cmd_checkout_default_wait, _ = facade_helper.run_git_command(
+                    cmd_checkout_default,
+                    timeout=600,  # 10 minutes for git checkout
+                    capture_output=False,
+                    operation_description=f'git checkout {remotedefault}'
+                )
 
                 cmdpull2 = (f"git -C {absolute_path} pull")
 
                 cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}")
 
-                try:
-                    result = subprocess.run(
-                        cmd_reset, shell=True,
-                        stdout=subprocess.DEVNULL,
-                        stderr=subprocess.DEVNULL,
-                        timeout=300,  # 5 minutes for git reset
-                        check=False
-                    )
-                    cmd_reset_wait = result.returncode
-                except subprocess.TimeoutExpired:
-                    facade_helper.log_activity('Error', f'Git operation timed out: {cmd_reset}')
-                    cmd_reset_wait = -1
+                cmd_reset_wait, _ = facade_helper.run_git_command(
+                    cmd_reset,
+                    timeout=300,  # 5 minutes for git reset
+                    capture_output=False,
+                    operation_description=f'git reset --hard origin/{remotedefault}'
+                )
 
                 cmd_clean = (f"git -C {absolute_path} clean -df")
 
-                try:
-                    result = subprocess.run(
-                        cmd_clean, shell=True,
-                        stdout=subprocess.DEVNULL,
-                        stderr=subprocess.DEVNULL,
-                        timeout=300,  # 5 minutes for git clean
-                        check=False
-                    )
-                    return_code_clean = result.returncode
-                except subprocess.TimeoutExpired:
-                    facade_helper.log_activity('Error', f'Git operation timed out: {cmd_clean}')
-                    return_code_clean = -1
+                return_code_clean, _ = facade_helper.run_git_command(
+                    cmd_clean,
+                    timeout=300,  # 5 minutes for git clean
+                    capture_output=False,
+                    operation_description='git clean -df'
+                )
 
             except Exception as e:
 
@@ -564,18 +486,12 @@ def git_repo_updates(facade_helper, repo_git):
         cmdpull2 = (f"git -C {absolute_path} pull")
 
         print(cmdpull2)
-        try:
-            result = subprocess.run(
-                cmdpull2, shell=True,
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-                timeout=600,  # 10 minutes for git pull
-                check=False
-            )
-            return_code = result.returncode
-        except subprocess.TimeoutExpired:
-            facade_helper.log_activity('Error', f'Git operation timed out: {cmdpull2}')
-            return_code = -1  # Timeout error code
+        return_code, _ = facade_helper.run_git_command(
+            cmdpull2,
+            timeout=600,  # 10 minutes for git pull
+            capture_output=False,
+            operation_description=f'git pull {repo.repo_git}'
+        )
 
         attempt += 1
 
diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py
index 6df720584c..92546002ae 100644
--- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py
+++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py
@@ -105,22 +105,22 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name):
 	
 	return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}"
 
-def get_parent_commits_set(absolute_repo_path, logger=None):
+def get_parent_commits_set(absolute_repo_path, facade_helper, logger=None):
 
 	cmd = "git --git-dir %s log --ignore-missing --pretty=format:'%%H'" % (absolute_repo_path)
-	try:
-		result = subprocess.run(
-			cmd, shell=True,
-			capture_output=True,
-			encoding='utf-8', errors='replace',  # Handle non-UTF-8 gracefully
-			timeout=600,  # 10 minutes for git log
-			check=False
-		)
-		parent_commits = set(result.stdout.split(os.linesep))
-	except subprocess.TimeoutExpired:
-		if logger:
-			logger.error(f"Git log timed out for repo: {absolute_repo_path}")
-		parent_commits = set()  # Return empty set on timeout
+
+	# Use facade_helper's unified git command runner
+	return_code, stdout = facade_helper.run_git_command(
+		cmd,
+		timeout=600,  # 10 minutes for git log
+		capture_output=True,
+		operation_description=f'git log for {absolute_repo_path}'
+	)
+
+	if return_code == 0:
+		parent_commits = set(stdout.split(os.linesep))
+	else:
+		parent_commits = set()  # Return empty set on timeout or error
 
 	# If there are no commits in the range, we still get a blank entry in
 	# the set. Remove it, as it messes with the calculations

From 7bf42a3736d6d296990d0292708325e7adb4b023 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Mon, 1 Dec 2025 10:01:56 -0500
Subject: [PATCH 044/104] refactor subprocess.run calls in FacadeHelper to use
 common options

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 .../facade_worker/facade_worker/config.py     | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py
index 6f9cd2cc98..09f3c9d6ca 100644
--- a/augur/tasks/git/util/facade_worker/facade_worker/config.py
+++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py
@@ -279,23 +279,28 @@ def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False,
             operation_description = cmd
 
         try:
+            # Common options for all subprocess.run calls
+            run_options = {
+                'shell': True,
+                'timeout': timeout,
+                'check': False
+            }
+
+            # Add capture_output-specific options
+            if capture_output:
+                run_options['capture_output'] = True
+                run_options['encoding'] = 'utf-8'
+                run_options['errors'] = 'replace'
+            else:
+                run_options['stdout'] = subprocess.DEVNULL
+                run_options['stderr'] = subprocess.DEVNULL
+
+            result = subprocess.run(cmd, **run_options)
+
+            # Return appropriate output based on capture_output flag
             if capture_output:
-                result = subprocess.run(
-                    cmd, shell=True,
-                    capture_output=True,
-                    encoding='utf-8', errors='replace',
-                    timeout=timeout,
-                    check=False
-                )
                 return result.returncode, result.stdout.strip()
             else:
-                result = subprocess.run(
-                    cmd, shell=True,
-                    stdout=subprocess.DEVNULL,
-                    stderr=subprocess.DEVNULL,
-                    timeout=timeout,
-                    check=False
-                )
                 return result.returncode, ''
         except subprocess.TimeoutExpired:
             self.log_activity('Error', f'Git operation timed out: {operation_description}')

From 1ccc8dd1e51ee2776bcda331d92725ce21da51ba Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Mon, 1 Dec 2025 15:29:49 -0500
Subject: [PATCH 045/104] Pylint and other style fixes

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/git/facade_tasks.py           |  2 +-
 augur/tasks/github/facade_github/tasks.py |  1 -
 augur/tasks/github/issues.py              |  2 +-
 augur/tasks/github/pull_requests/tasks.py | 34 ++++-------------------
 4 files changed, 7 insertions(+), 32 deletions(-)

diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py
index d5ead38d14..8303aab5b1 100644
--- a/augur/tasks/git/facade_tasks.py
+++ b/augur/tasks/git/facade_tasks.py
@@ -255,7 +255,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
     facade_helper.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}")
 
     
-    if not len(missing_commits) or repo_id is None:
+    if missing_commits or repo_id is None:
         #session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits))
         return
     
diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py
index 73fd9a51b5..53a3d6648a 100644
--- a/augur/tasks/github/facade_github/tasks.py
+++ b/augur/tasks/github/facade_github/tasks.py
@@ -5,7 +5,6 @@
 from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask
 from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException
 from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
-from augur.application.db.models import Contributor
 from augur.tasks.github.facade_github.core import *
 from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors
 from augur.application.db.lib import get_session, execute_session_query
diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py
index aaca35ed5f..91e56deaf7 100644
--- a/augur/tasks/github/issues.py
+++ b/augur/tasks/github/issues.py
@@ -12,7 +12,7 @@
 from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
 from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo
 from augur.tasks.util.worker_util import remove_duplicate_dicts
-from augur.application.db.models import Issue, IssueLabel, IssueAssignee, Contributor
+from augur.application.db.models import Issue, IssueLabel, IssueAssignee
 from augur.application.config import get_development_flag
 from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors
 
diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py
index 88cb5afe21..f18a656a98 100644
--- a/augur/tasks/github/pull_requests/tasks.py
+++ b/augur/tasks/github/pull_requests/tasks.py
@@ -11,12 +11,12 @@
 from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo
 from augur.tasks.github.util.github_task_session import GithubTaskManifest
 from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
-from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors
+from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors
 from augur.application.db.util import execute_session_query
 from ..messages import process_github_comment_contributors
 from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected
 
-from typing import Generator, List, Dict
+from typing import List
 
 
 platform_id = 1
@@ -52,15 +52,15 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int:
                 total_count += len(all_data)
                 all_data.clear()
 
-        if len(all_data):
+        if all_data:
             process_pull_requests(all_data, f"{owner}/{repo}: Github Pr task", repo_id, logger, augur_db)
             total_count += len(all_data)
 
         if total_count > 0:
-            return total_count
-        else:
             logger.debug(f"{owner}/{repo} has no pull requests")
             return 0
+
+        return total_count
         
         
     
@@ -182,30 +182,6 @@ def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db):
                         pr_metadata_natural_keys, string_fields=pr_metadata_string_fields)
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 def process_pull_request_review_contributor(pr_review: dict, tool_source: str, tool_version: str, data_source: str):
 
     # get contributor data and set pr cntrb_id

From aac134ea9acb078b0c1bab083fb346737f0aebcd Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 12:13:25 -0500
Subject: [PATCH 046/104] remove three files that are entirely comments

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/db/models/augur_data_old.py | 2803 -----------------
 .../db/models/augur_operations_old.py         |  123 -
 augur/application/db/models/spdx_old.py       |  525 ---
 3 files changed, 3451 deletions(-)
 delete mode 100644 augur/application/db/models/augur_data_old.py
 delete mode 100644 augur/application/db/models/augur_operations_old.py
 delete mode 100644 augur/application/db/models/spdx_old.py

diff --git a/augur/application/db/models/augur_data_old.py b/augur/application/db/models/augur_data_old.py
deleted file mode 100644
index 5a71e4ede8..0000000000
--- a/augur/application/db/models/augur_data_old.py
+++ /dev/null
@@ -1,2803 +0,0 @@
-# from augur.application.db.models.base import Base
-# from sqlalchemy import (
-#     Column,
-#     Integer,
-#     String,
-#     UniqueConstraint,
-#     ForeignKey,
-#     Text,
-#     Boolean,
-#     BigInteger,
-#     SmallInteger,
-#     Index,
-#     Float,
-#     func,
-#     Date,
-#     text,
-#     Numeric,
-#     PrimaryKeyConstraint,
-#     CHAR,
-#     TIMESTAMP,
-#     JSON,
-# )
-# from sqlalchemy.dialects.postgresql import JSONB
-# from sqlalchemy.orm import relationship
-
-# # TODO: look at how facade queries it and add index
-
-# # TODO: look at how facade queries it and add index
-# class AnalysisLog(Base):
-#     analysis_log_id = Column(BigInteger, primary_key=True)
-#     repos_id = Column(Integer, nullable=False)
-#     status = Column(String(), nullable=False)
-#     date_attempted = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-
-#     # this is an insert always table so it does not need a UniqueConstraint
-#     __tablename__ = "analysis_log"
-#     __table_args__ = (Index("repos_id", repos_id), {"schema": "augur_data"})
-
-
-# # TODO: Manually filled by creation script
-# # TODO: Could revive this table_
-
-
-# class ChaossMetricStatus(Base):
-#     cms_id = Column(BigInteger, primary_key=True, nullable=False)
-#     cm_group = Column(String())
-#     cm_source = Column(String())
-#     cm_type = Column(String())
-#     cm_backend_status = Column(String())
-#     cm_frontend_status = Column(String())
-#     cm_defined = Column(Boolean())
-#     cm_api_endpoint_repo = Column(String())
-#     cm_api_endpoint_rg = Column(String())
-#     cm_name = Column(String())
-#     cm_working_group = Column(String())
-#     cm_info = Column(JSON())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-#     cm_working_group_focus_area = Column(String())
-
-#     __tablename__ = "chaoss_metric_status"
-#     __table_args__ = {
-#         "schema": "augur_data",
-#         "comment": "This table used to track CHAOSS Metric implementations in Augur, but due to the constantly changing location of that information, it is for the moment not actively populated. ",
-#     }
-
-
-# class CommitCommentRef(Base):
-#     cmt_comment_id = Column(BigInteger, primary_key=True, nullable=False)
-#     cmt_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.commits.cmt_id",
-#             name="fk_commit_comment_ref_commits_1",
-#             onupdate="CASCADE",
-#             ondelete="RESTRICT",
-#         ),
-#         nullable=False,
-#     )
-#     repo_id = Column(BigInteger)
-#     msg_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.message.msg_id",
-#             name="fk_commit_comment_ref_message_1",
-#             onupdate="CASCADE",
-#             ondelete="RESTRICT",
-#         ),
-#         nullable=False,
-#     )
-#     user_id = Column(BigInteger, nullable=False)
-#     body = Column(Text())
-#     line = Column(BigInteger)
-#     position = Column(BigInteger)
-#     commit_comment_src_node_id = Column(
-#         String(),
-#         comment="For data provenance, we store the source node ID if it exists. ",
-#     )
-#     cmt_comment_src_id = Column(
-#         BigInteger,
-#         nullable=False,
-#         comment="For data provenance, we store the source ID if it exists. ",
-#     )
-#     created_at = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     message = relationship("Message", back_populates="commit")
-#     commit = relationship("Commits", back_populates="msg_ref")
-
-#     __tablename__ = "commit_comment_ref"
-#     __table_args__ = (
-#         Index("comment_id", cmt_comment_src_id, cmt_comment_id, msg_id),
-#         # unique value for insertion
-#         UniqueConstraint("cmt_comment_src_id", name="commitcomment"),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: This table does not get used so remove it and test without
-
-
-# class CommitParents(Base):
-#     cmt_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.commits.cmt_id", name="fk_commit_parents_commits_1"),
-#         primary_key=True,
-#     )
-#     parent_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.commits.cmt_id", name="fk_commit_parents_commits_2"),
-#         primary_key=True,
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "commit_parents"
-#     __table_args__ = (
-#         Index("commit_parents_ibfk_1", cmt_id),
-#         Index("commit_parents_ibfk_2", parent_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: Add foriegn key: cmt_author_platform_username = Column(String(), ForeignKey('augur_data.contributors.cntrb_login', name='fk_commits_contributors_3', ondelete="CASCADE", onupdate="CASCADE"))
-# # TODO: Add relationship with this foreign key
-# class Commits(Base):
-#     cmt_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_commits_repo_2",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#         nullable=False,
-#     )
-#     cmt_commit_hash = Column(String(), nullable=False)
-#     cmt_author_name = Column(String(), nullable=False)
-#     cmt_author_raw_email = Column(String(), nullable=False)
-#     cmt_author_email = Column(String(), nullable=False)
-#     cmt_author_date = Column(String(), nullable=False)
-#     cmt_author_affiliation = Column(String(), server_default="NULL")
-#     cmt_committer_name = Column(String(), nullable=False)
-#     cmt_committer_raw_email = Column(String(), nullable=False)
-#     cmt_committer_email = Column(String(), nullable=False)
-#     cmt_committer_date = Column(String(), nullable=False)
-#     cmt_committer_affiliation = Column(String(), server_default="NULL")
-#     cmt_added = Column(Integer, nullable=False)
-#     cmt_removed = Column(Integer, nullable=False)
-#     cmt_whitespace = Column(Integer, nullable=False)
-#     cmt_filename = Column(String(), nullable=False)
-#     cmt_date_attempted = Column(TIMESTAMP(), nullable=False)
-#     cmt_ght_author_id = Column(Integer)
-#     cmt_ght_committer_id = Column(Integer)
-#     cmt_ght_committed_at = Column(TIMESTAMP())
-#     cmt_committer_timestamp = Column(TIMESTAMP(timezone=True))
-#     cmt_author_timestamp = Column(TIMESTAMP(timezone=True))
-#     # TODO: Appears that this foreign key is duplicated in the database
-#     cmt_author_platform_username = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     msg_ref = relationship("CommitCommentRef", back_populates="commit")
-
-#     def get_messages(self):
-
-#         messages = []
-#         for msg_ref in self.msg_ref:
-#             messages.append(msg_ref.message)
-
-#         return messages
-
-#     __tablename__ = "commits"
-#     __table_args__ = (
-#         Index("author_affiliation", cmt_author_affiliation, postgresql_using="hash"),
-#         Index("author_cntrb_id", cmt_ght_author_id),
-#         Index(
-#             "author_email,author_affiliation,author_date",
-#             cmt_author_email,
-#             cmt_author_affiliation,
-#             cmt_author_date,
-#         ),
-#         Index("author_raw_email", cmt_author_raw_email),
-#         Index("cmt-author-date-idx2", cmt_author_date),
-#         Index(
-#             "cmt_author_contrib_worker",
-#             cmt_author_name,
-#             cmt_author_email,
-#             cmt_author_date,
-#             postgresql_using="brin",
-#         ),
-#         Index(
-#             "cmt_commiter_contrib_worker",
-#             cmt_committer_name,
-#             cmt_committer_email,
-#             cmt_committer_date,
-#             postgresql_using="brin",
-#         ),
-#         Index("commited", cmt_id),
-#         Index(
-#             "commits_idx_cmt_email_cmt_date_cmt_name",
-#             cmt_author_email,
-#             cmt_author_date,
-#             cmt_author_name,
-#         ),
-#         Index(
-#             "commits_idx_repo_id_cmt_ema_cmt_dat_cmt_nam",
-#             repo_id,
-#             cmt_author_email,
-#             cmt_author_date,
-#             cmt_author_name,
-#         ),
-#         Index(
-#             "commits_idx_repo_id_cmt_ema_cmt_dat_cmt_nam2",
-#             repo_id,
-#             cmt_committer_email,
-#             cmt_committer_date,
-#             cmt_committer_name,
-#         ),
-#         Index(
-#             "committer_affiliation", cmt_committer_affiliation, postgresql_using="hash"
-#         ),
-#         Index(
-#             "committer_email,committer_affiliation,committer_date",
-#             cmt_committer_email,
-#             cmt_committer_affiliation,
-#             cmt_committer_date,
-#         ),
-#         Index("committer_raw_email", cmt_committer_raw_email),
-#         Index("repo_id,commit", repo_id, cmt_commit_hash),
-#         {
-#             "schema": "augur_data",
-#             "comment": "Commits.\nEach row represents changes to one FILE within a single commit. So you will encounter multiple rows per commit hash in many cases. ",
-#         },
-#     )
-
-
-# # Current has varchar with length but I changed that
-# class ContributorAffiliations(Base):
-#     ca_id = Column(BigInteger, primary_key=True, nullable=False)
-#     ca_domain = Column(String(), nullable=False)
-#     ca_start_date = Column(Date, server_default="1970-01-01")
-#     ca_last_used = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-#     ca_affiliation = Column(String())
-#     ca_active = Column(SmallInteger, server_default=text("1"))
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "contributor_affiliations"
-#     __table_args__ = (
-#         UniqueConstraint("ca_domain", name="unique_domain"),
-#         {
-#             "schema": "augur_data",
-#             "comment": "This table exists outside of relations with other tables. The purpose is to provide a dynamic, owner maintained (and augur augmented) list of affiliations. This table is processed in affiliation information in the DM_ tables generated when Augur is finished counting commits using the Facade Worker. ",
-#         },
-#     )
-
-
-# # TODO: Add foreign key to repo table on cntrb_repo_id
-
-
-# class ContributorRepo(Base):
-#     cntrb_repo_id = Column(BigInteger, nullable=False)
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_contributor_repo_contributors_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#         nullable=False,
-#         comment="This is not null because what is the point without the contributor in this table? ",
-#     )
-#     repo_git = Column(
-#         String(),
-#         nullable=False,
-#         comment="Similar to cntrb_id, we need this data for the table to have meaningful data. ",
-#     )
-#     repo_name = Column(String(), nullable=False)
-#     gh_repo_id = Column(BigInteger, nullable=False)
-#     cntrb_category = Column(String())
-#     event_id = Column(BigInteger)
-#     created_at = Column(TIMESTAMP())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "contributor_repo"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("cntrb_repo_id", name="cntrb_repo_id_key"),
-#         UniqueConstraint("event_id", "tool_version", name="eventer"),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class Contributors(Base):
-#     cntrb_id = Column(BigInteger, primary_key=True, nullable=False)
-#     cntrb_login = Column(
-#         String(),
-#         comment="Will be a double population with the same value as gh_login for github, but the local value for other systems. ",
-#     )
-#     cntrb_email = Column(
-#         String(),
-#         comment="This needs to be here for matching contributor ids, which are augur, to the commit information. ",
-#     )
-#     cntrb_full_name = Column(String())
-#     cntrb_company = Column(String())
-#     cntrb_created_at = Column(TIMESTAMP())
-#     cntrb_type = Column(
-#         String(),
-#         comment="Present in another models. It is not currently used in Augur. ",
-#     )
-#     cntrb_fake = Column(SmallInteger, server_default=text("0"))
-#     cntrb_deleted = Column(SmallInteger, server_default=text("0"))
-#     cntrb_long = Column(Numeric(precision=11, scale=8))
-#     cntrb_lat = Column(Numeric(precision=10, scale=8))
-#     cntrb_country_code = Column(CHAR(length=3))
-#     cntrb_state = Column(String())
-#     cntrb_city = Column(String())
-#     cntrb_location = Column(String())
-#     cntrb_canonical = Column(String())
-#     cntrb_last_used = Column(TIMESTAMP(timezone=True))
-#     gh_user_id = Column(BigInteger)
-#     gh_login = Column(
-#         String(),
-#         comment="populated with the github user name for github originated data. ",
-#     )
-#     gh_url = Column(String())
-#     gh_html_url = Column(String())
-#     gh_node_id = Column(String())
-#     gh_avatar_url = Column(String())
-#     gh_gravatar_id = Column(String())
-#     gh_followers_url = Column(String())
-#     gh_following_url = Column(String())
-#     gh_gists_url = Column(String())
-#     gh_starred_url = Column(String())
-#     gh_subscriptions_url = Column(String())
-#     gh_organizations_url = Column(String())
-#     gh_repos_url = Column(String())
-#     gh_events_url = Column(String())
-#     gh_received_events_url = Column(String())
-#     gh_type = Column(String())
-#     gh_site_admin = Column(String())
-#     gl_web_url = Column(String())
-#     gl_avatar_url = Column(String())
-#     gl_state = Column(String())
-#     gl_username = Column(String())
-#     gl_full_name = Column(String())
-#     gl_id = Column(BigInteger)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     repos_contributed = relationship("ContributorRepo")
-#     aliases = relationship("ContributorsAliases")
-#     messages = relationship("Message")
-
-#     __tablename__ = "contributors"
-#     __table_args__ = (
-#         UniqueConstraint(
-#             "gh_login", name="GH-UNIQUE-C", initially="DEFERRED", deferrable=True
-#         ),
-#         UniqueConstraint(
-#             "gl_id", name="GL-UNIQUE-B", initially="DEFERRED", deferrable=True
-#         ),
-#         # unique key for gitlab users on insertion
-#         UniqueConstraint(
-#             "gl_username", name="GL-UNIQUE-C", initially="DEFERRED", deferrable=True
-#         ),
-#         # unique key to insert on for github
-#         UniqueConstraint("cntrb_login", name="GL-cntrb-LOGIN-UNIQUE"),
-#         Index("cnt-fullname", cntrb_full_name, postgresql_using="hash"),
-#         Index("cntrb-theemail", cntrb_email, postgresql_using="hash"),
-#         Index("cntrb_canonica-idx11", cntrb_canonical),
-#         Index("cntrb_login_platform_index", cntrb_login),
-#         Index(
-#             "contributor_delete_finder", cntrb_id, cntrb_email, postgresql_using="brin"
-#         ),
-#         Index("contributor_worker_email_finder", cntrb_email, postgresql_using="brin"),
-#         Index(
-#             "contributor_worker_finder",
-#             cntrb_login,
-#             cntrb_email,
-#             cntrb_id,
-#             postgresql_using="brin",
-#         ),
-#         # TODO: This index is the same as the first one but one has a different stuff
-#         Index(
-#             "contributor_worker_fullname_finder",
-#             cntrb_full_name,
-#             postgresql_using="brin",
-#         ),
-#         Index("contributors_idx_cntrb_email3", cntrb_email),
-#         # TODO: These last onese appear to be the same
-#         Index("login", cntrb_login),
-#         Index("login-contributor-idx", cntrb_login),
-#         {
-#             "schema": "augur_data",
-#             "comment": "For GitHub, this should be repeated from gh_login. for other systems, it should be that systems login.\nGithub now allows a user to change their login name, but their user id remains the same in this case. So, the natural key is the combination of id and login, but there should never be repeated logins. ",
-#         },
-#     )
-
-
-# class ContributorsAliases(Base):
-#     cntrb_alias_id = Column(BigInteger, primary_key=True, nullable=False)
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_contributors_aliases_contributors_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#         nullable=False,
-#     )
-#     canonical_email = Column(String(), nullable=False)
-#     alias_email = Column(String(), nullable=False)
-#     cntrb_active = Column(SmallInteger, nullable=False, server_default=text("1"))
-#     cntrb_last_modified = Column(TIMESTAMP(), server_default=func.current_timestamp())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "contributors_aliases"
-#     __table_args__ = (
-#         UniqueConstraint(
-#             "alias_email",
-#             "canonical_email",
-#             name="only-email-once",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#         {
-#             "schema": "augur_data",
-#             "comment": "Every open source user may have more than one email used to make contributions over time. Augur selects the first email it encounters for a user as its “canonical_email”. \n\nThe canonical_email is also added to the contributors_aliases table, with the canonical_email and alias_email being identical.  Using this strategy, an email search will only need to join the alias table for basic email information, and can then more easily map the canonical email from each alias row to the same, more detailed information in the contributors table for a user. ",
-#         },
-#     )
-
-
-# # TODO: Add relationship: Don't understand table well enough
-# class DiscourseInsights(Base):
-#     msg_discourse_id = Column(BigInteger, primary_key=True, nullable=False)
-#     msg_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.message.msg_id", name="fk_discourse_insights_message_1"),
-#     )
-#     discourse_act = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "discourse_insights"
-#     __table_args__ = {
-#         "schema": "augur_data",
-#         "comment": "This table is populated by the “Discourse_Analysis_Worker”. It examines sequential discourse, using computational linguistic methods, to draw statistical inferences regarding the discourse in a particular comment thread. ",
-#     }
-
-
-# # TODO: Add foreign keys to repo and repogroups
-
-
-# class DmRepoAnnual(Base):
-#     dm_repo_annual_id = Column(BigInteger, primary_key=True)
-#     repo_id = Column(BigInteger, nullable=False)
-#     email = Column(String(), nullable=False)
-#     affiliation = Column(String(), server_default="NULL")
-#     year = Column(SmallInteger, nullable=False)
-#     added = Column(BigInteger, nullable=False)
-#     removed = Column(BigInteger, nullable=False)
-#     whitespace = Column(BigInteger, nullable=False)
-#     files = Column(BigInteger, nullable=False)
-#     patches = Column(BigInteger, nullable=False)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "dm_repo_annual"
-#     __table_args__ = (
-#         Index("repo_id,affiliation_copy_1", repo_id, affiliation),
-#         Index("repo_id,email_copy_1", repo_id, email),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class DmRepoGroupAnnual(Base):
-#     dm_repo_group_annual_id = Column(BigInteger, primary_key=True)
-#     repo_group_id = Column(BigInteger, nullable=False)
-#     email = Column(String(), nullable=False)
-#     affiliation = Column(String(), server_default="NULL")
-#     year = Column(SmallInteger, nullable=False)
-#     added = Column(BigInteger, nullable=False)
-#     removed = Column(BigInteger, nullable=False)
-#     whitespace = Column(BigInteger, nullable=False)
-#     files = Column(BigInteger, nullable=False)
-#     patches = Column(BigInteger, nullable=False)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "dm_repo_group_annual"
-#     __table_args__ = (
-#         Index("projects_id,affiliation_copy_1", repo_group_id, affiliation),
-#         Index("projects_id,email_copy_1", repo_group_id, email),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class DmRepoGroupMonthly(Base):
-#     dm_repo_group_monthly_id = Column(BigInteger, primary_key=True)
-#     repo_group_id = Column(BigInteger, nullable=False)
-#     email = Column(String(), nullable=False)
-#     affiliation = Column(String(), server_default="NULL")
-#     month = Column(SmallInteger, nullable=False)
-#     year = Column(SmallInteger, nullable=False)
-#     added = Column(BigInteger, nullable=False)
-#     removed = Column(BigInteger, nullable=False)
-#     whitespace = Column(BigInteger, nullable=False)
-#     files = Column(BigInteger, nullable=False)
-#     patches = Column(BigInteger, nullable=False)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "dm_repo_group_monthly"
-#     __table_args__ = (
-#         Index("projects_id,affiliation_copy_2", repo_group_id, affiliation),
-#         Index("projects_id,email_copy_2", repo_group_id, email),
-#         Index("projects_id,year,affiliation_copy_1", repo_group_id, year, affiliation),
-#         Index("projects_id,year,email_copy_1", repo_group_id, year, email),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class DmRepoGroupWeekly(Base):
-#     dm_repo_group_weekly_id = Column(BigInteger, primary_key=True)
-#     repo_group_id = Column(BigInteger, nullable=False)
-#     email = Column(String(), nullable=False)
-#     affiliation = Column(String(), server_default="NULL")
-#     week = Column(SmallInteger, nullable=False)
-#     year = Column(SmallInteger, nullable=False)
-#     added = Column(BigInteger, nullable=False)
-#     removed = Column(BigInteger, nullable=False)
-#     whitespace = Column(BigInteger, nullable=False)
-#     files = Column(BigInteger, nullable=False)
-#     patches = Column(BigInteger, nullable=False)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "dm_repo_group_weekly"
-#     __table_args__ = (
-#         Index("projects_id,affiliation", repo_group_id, affiliation),
-#         Index("projects_id,email", repo_group_id, email),
-#         Index("projects_id,year,affiliation", repo_group_id, year, affiliation),
-#         Index("projects_id,year,email", repo_group_id, year, email),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class DmRepoMonthly(Base):
-#     dm_repo_monthly_id = Column(BigInteger, primary_key=True)
-#     repo_id = Column(BigInteger, nullable=False)
-#     email = Column(String(), nullable=False)
-#     affiliation = Column(String(), server_default="NULL")
-#     month = Column(SmallInteger, nullable=False)
-#     year = Column(SmallInteger, nullable=False)
-#     added = Column(BigInteger, nullable=False)
-#     removed = Column(BigInteger, nullable=False)
-#     whitespace = Column(BigInteger, nullable=False)
-#     files = Column(BigInteger, nullable=False)
-#     patches = Column(BigInteger, nullable=False)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "dm_repo_monthly"
-#     __table_args__ = (
-#         Index("repo_id,affiliation_copy_2", repo_id, affiliation),
-#         Index("repo_id,email_copy_2", repo_id, email),
-#         Index("repo_id,year,affiliation_copy_1", repo_id, year, affiliation),
-#         Index("repo_id,year,email_copy_1", repo_id, year, email),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class DmRepoWeekly(Base):
-#     dm_repo_weekly_id = Column(BigInteger, primary_key=True)
-#     repo_id = Column(BigInteger, nullable=False)
-#     email = Column(String(), nullable=False)
-#     affiliation = Column(String(), server_default="NULL")
-#     week = Column(SmallInteger, nullable=False)
-#     year = Column(SmallInteger, nullable=False)
-#     added = Column(BigInteger, nullable=False)
-#     removed = Column(BigInteger, nullable=False)
-#     whitespace = Column(BigInteger, nullable=False)
-#     files = Column(BigInteger, nullable=False)
-#     patches = Column(BigInteger, nullable=False)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "dm_repo_weekly"
-#     __table_args__ = (
-#         Index("repo_id,affiliation", repo_id, affiliation),
-#         Index("repo_id,email", repo_id, email),
-#         Index("repo_id,year,affiliation", repo_id, year, affiliation),
-#         Index("repo_id,year,email", repo_id, year, email),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class Exclude(Base):
-#     id = Column(Integer, primary_key=True, nullable=False)
-#     projects_id = Column(Integer, nullable=False)
-#     email = Column(String(), server_default="NULL")
-#     domain = Column(String(), server_default="NULL")
-
-#     __tablename__ = "exclude"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# # TODO: Add relationship for repo_id: I don't think the repo_id should be in this table, I think that behavior can be obtained by getting all the issues for a repo then all the issue assignees for those issues
-# # TODO: Add relationship for cntrb_id
-# class IssueAssignees(Base):
-#     issue_assignee_id = Column(BigInteger, primary_key=True, nullable=False)
-#     issue_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.issues.issue_id", name="fk_issue_assignees_issues_1"),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_issue_assignee_repo_id",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id", name="fk_issue_assignees_contributors_1"
-#         ),
-#     )
-#     issue_assignee_src_id = Column(
-#         BigInteger,
-#         comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.",
-#     )
-#     issue_assignee_src_node = Column(
-#         String(),
-#         comment="This character based identifier comes from the source. In the case of GitHub, it is the id that is the second field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "issue_assignees"
-#     __table_args__ = (
-#         Index("issue-cntrb-assign-idx-1", cntrb_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: Add relationship for repo_id: I don't think the repo_id should be in this table, I think that behavior can be obtained by getting all the issues for a repo then all the issue assignees for those issues
-# # TODO: Add relationship for cntrb_id
-
-
-# class IssueEvents(Base):
-#     event_id = Column(BigInteger, primary_key=True, nullable=False)
-#     issue_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.issues.issue_id",
-#             name="fk_issue_events_issues_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#         nullable=False,
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_issue_events_repo",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_issue_events_contributors_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#         nullable=False,
-#     )
-#     action = Column(String(), nullable=False)
-#     action_commit_hash = Column(String())
-#     created_at = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-#     node_id = Column(
-#         String(),
-#         comment="This should be renamed to issue_event_src_node_id, as its the varchar identifier in GitHub and likely common in other sources as well. However, since it was created before we came to this naming standard and workers are built around it, we have it simply named as node_id. Anywhere you see node_id in the schema, it comes from GitHubs terminology.",
-#     )
-#     node_url = Column(String())
-#     issue_event_src_id = Column(
-#         BigInteger,
-#         comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-#     platform_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.platform.pltfrm_id",
-#             name="fk_issue_event_platform_ide",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#     )
-
-#     __tablename__ = "issue_events"
-#     __table_args__ = (
-#         # contstraint to determine whether to insert or not
-#         UniqueConstraint("issue_id", "issue_event_src_id", name="unique_event_id_key"),
-#         Index("issue-cntrb-idx2", issue_event_src_id),
-#         Index("issue_events_ibfk_1", issue_id),
-#         Index("issue_events_ibfk_2", cntrb_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: Add relationship for repo_id: I don't think the repo_id should be in this table, I think that behavior can be obtained by getting all the issues for a repo then all the issue assignees for those issues
-
-
-# class IssueLabels(Base):
-#     issue_label_id = Column(BigInteger, primary_key=True, nullable=False)
-#     issue_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.issues.issue_id", name="fk_issue_labels_issues_1"),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_issue_labels_repo_id",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     label_text = Column(String())
-#     label_description = Column(String())
-#     label_color = Column(String())
-#     label_src_id = Column(
-#         BigInteger,
-#         comment="This character based identifier (node) comes from the source. In the case of GitHub, it is the id that is the second field returned from the issue events API JSON subsection for issues.",
-#     )
-#     label_src_node_id = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "issue_labels"
-#     __table_args__ = (
-#         # insert on
-#         UniqueConstraint("label_src_id", "issue_id", name="unique_issue_label"),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: Add replationship: for repo_id
-
-
-# class IssueMessageRef(Base):
-#     issue_msg_ref_id = Column(BigInteger, primary_key=True, nullable=False)
-#     issue_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.issues.issue_id",
-#             name="fk_issue_message_ref_issues_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_repo_id_fk1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     msg_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.message.msg_id",
-#             name="fk_issue_message_ref_message_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     issue_msg_ref_src_node_id = Column(
-#         String(),
-#         comment="This character based identifier comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue comments API",
-#     )
-#     issue_msg_ref_src_comment_id = Column(
-#         BigInteger,
-#         comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue comments API",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     message = relationship("Message", back_populates="issue")
-#     issue = relationship("Issues", back_populates="msg_ref")
-
-#     __tablename__ = "issue_message_ref"
-#     __table_args__ = (
-#         # insert on
-#         UniqueConstraint(
-#             "issue_msg_ref_src_comment_id", "tool_source", name="repo-issue"
-#         ),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: Add relationship for cntrb_id
-# # should repo_id be allowed to be NULL?
-
-
-# class Issues(Base):
-#     issue_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_issues_repo",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     reporter_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.contributors.cntrb_id", name="fk_issues_contributors_2"),
-#         comment="The ID of the person who opened the issue. ",
-#     )
-#     pull_request = Column(BigInteger)
-#     pull_request_id = Column(BigInteger)
-#     created_at = Column(TIMESTAMP())
-#     issue_title = Column(String())
-#     issue_body = Column(String())
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.contributors.cntrb_id", name="fk_issues_contributors_1"),
-#         comment="The ID of the person who closed the issue. ",
-#     )
-#     comment_count = Column(BigInteger)
-#     updated_at = Column(TIMESTAMP())
-#     closed_at = Column(TIMESTAMP())
-#     due_on = Column(TIMESTAMP())
-#     repository_url = Column(String())
-#     issue_url = Column(String())
-#     labels_url = Column(String())
-#     comments_url = Column(String())
-#     events_url = Column(String())
-#     html_url = Column(String())
-#     issue_state = Column(String())
-#     issue_node_id = Column(String())
-#     gh_issue_number = Column(BigInteger)
-#     gh_issue_id = Column(BigInteger)
-#     gh_user_id = Column(BigInteger)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     assignees = relationship("IssueAssignees")
-#     events = relationship("IssueEvents")
-#     labels = relationship("IssueLabels")
-
-#     msg_ref = relationship("IssueMessageRef", back_populates="issue")
-
-#     def get_messages(self):
-
-#         messages = []
-#         for msg_ref in self.msg_ref:
-#             messages.append(msg_ref.message)
-
-#         return messages
-
-#     __tablename__ = "issues"
-#     __table_args__ = (
-#         Index("issue-cntrb-dix2", cntrb_id),
-#         Index("issues_ibfk_1", repo_id),
-#         Index("issues_ibfk_2", reporter_id),
-#         Index("issues_ibfk_4", pull_request_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: Should latest_release_timestamp be a timestamp
-# class Libraries(Base):
-#     library_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_libraries_repo_1"),
-#     )
-#     platform = Column(String())
-#     name = Column(String())
-#     created_timestamp = Column(TIMESTAMP())
-#     updated_timestamp = Column(TIMESTAMP())
-#     library_description = Column(String())
-#     keywords = Column(String())
-#     library_homepage = Column(String())
-#     license = Column(String())
-#     version_count = Column(Integer)
-#     latest_release_timestamp = Column(String())
-#     latest_release_number = Column(String())
-#     package_manager_id = Column(String())
-#     dependency_count = Column(Integer)
-#     dependent_library_count = Column(Integer)
-#     primary_language = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     library_dependencies = relationship("LibraryDependecies")
-
-#     # TODO: Should this be a one to one relationship with library version (this it what I defined it as)?
-#     library_version = relationship("LibraryVersion", back_populates="library")
-
-#     __tablename__ = "libraries"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class LibraryDependecies(Base):
-#     lib_dependency_id = Column(BigInteger, primary_key=True, nullable=False)
-#     library_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.libraries.library_id",
-#             name="fk_library_dependencies_libraries_1",
-#         ),
-#     )
-#     manifest_platform = Column(String())
-#     manifest_filepath = Column(String())
-#     manifest_kind = Column(String())
-#     repo_id_branch = Column(String(), nullable=False)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "library_dependencies"
-#     __table_args__ = (Index("REPO_DEP", library_id), {"schema": "augur_data"})
-
-
-# class LibraryVersion(Base):
-#     library_version_id = Column(BigInteger, primary_key=True, nullable=False)
-#     library_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.libraries.library_id", name="fk_library_version_libraries_1"
-#         ),
-#     )
-#     library_platform = Column(String())
-#     version_number = Column(String())
-#     version_release_date = Column(TIMESTAMP())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     library = relationship("Libraries", back_populates="library_version")
-
-#     __tablename__ = "library_version"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class LstmAnomalyModels(Base):
-#     model_id = Column(BigInteger, primary_key=True, nullable=False)
-#     model_name = Column(String())
-#     model_description = Column(String())
-#     look_back_days = Column(BigInteger)
-#     training_days = Column(BigInteger)
-#     batch_size = Column(BigInteger)
-#     metric = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     # TODO: Should this be a one to one relationship?
-#     model_result = relationship("LstmAnomalyResults")
-
-#     __tablename__ = "lstm_anomaly_models"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class LstmAnomalyResults(Base):
-#     result_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_lstm_anomaly_results_repo_1"),
-#     )
-#     repo_category = Column(String())
-#     model_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.lstm_anomaly_models.model_id",
-#             name="fk_lstm_anomaly_results_lstm_anomaly_models_1",
-#         ),
-#     )
-#     metric = Column(String())
-#     contamination_factor = Column(Float())
-#     mean_absolute_error = Column(Float())
-#     remarks = Column(String())
-#     metric_field = Column(
-#         String(),
-#         comment="This is a listing of all of the endpoint fields included in the generation of the metric. Sometimes there is one, sometimes there is more than one. This will list them all. ",
-#     )
-#     mean_absolute_actual_value = Column(Float())
-#     mean_absolute_prediction_value = Column(Float())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "lstm_anomaly_results"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# # TODO: I don't think that repo_id needs to be included because this behavior could be achieved by Repo.ParentObj.msg_ref.message (ParentObj is things such as prs or issues)
-# # TODO: Add relationship to repo group list serve table
-
-
-# class Message(Base):
-#     msg_id = Column(BigInteger, primary_key=True, nullable=False)
-#     rgls_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo_groups_list_serve.rgls_id",
-#             name="fk_message_repo_groups_list_serve_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     platform_msg_id = Column(BigInteger)
-#     platform_node_id = Column(String())
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_message_repoid",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_message_contributors_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#         comment="Not populated for mailing lists. Populated for GitHub issues. ",
-#     )
-#     msg_text = Column(String())
-#     msg_timestamp = Column(TIMESTAMP())
-#     msg_sender_email = Column(String())
-#     msg_header = Column(String())
-#     pltfrm_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.platform.pltfrm_id",
-#             name="fk_message_platform_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#         nullable=False,
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     # Used this thread to determine how to do one to many relationship with an extra middle table: https://stackoverflow.com/questions/35795717/flask-sqlalchemy-many-to-many-relationship-with-extra-field
-#     commit = relationship("CommitCommentRef", back_populates="message")
-#     issue = relationship("IssueMessageRef", back_populates="message")
-#     pull_request = relationship("PullRequestMessageRef", back_populates="message")
-#     pr_review = relationship("PullRequestReviewMessageRef", back_populates="message")
-
-#     analysis = relationship("MessageAnalysis", back_populates="message")
-#     sentiment = relationship("MessageSentiment", back_populates="message")
-
-#     __tablename__ = "message"
-#     __table_args__ = (
-#         UniqueConstraint("platform_msg_id", "tool_source", name="gh-message"),
-#         Index("messagegrouper", msg_id, rgls_id, unique=True),
-#         Index("msg-cntrb-id-idx", cntrb_id),
-#         Index("platformgrouper", msg_id, pltfrm_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class MessageAnalysis(Base):
-#     msg_analysis_id = Column(BigInteger, primary_key=True, nullable=False)
-#     msg_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.message.msg_id", name="fk_message_analysis_message_1"),
-#     )
-#     worker_run_id = Column(
-#         BigInteger,
-#         comment="This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis.  ",
-#     )
-#     sentiment_score = Column(
-#         Float(),
-#         comment="A sentiment analysis score. Zero is neutral, negative numbers are negative sentiment, and positive numbers are positive sentiment. ",
-#     )
-#     reconstruction_error = Column(
-#         Float(),
-#         comment="Each message is converted to a 250 dimensin doc2vec vector, so the reconstruction error is the difference between what the predicted vector and the actual vector.",
-#     )
-#     novelty_flag = Column(
-#         Boolean(),
-#         comment="This is an analysis of the degree to which the message is novel when compared to other messages in a repository.  For example when bots are producing numerous identical messages, the novelty score is low. It would also be a low novelty score when several people are making the same coment. ",
-#     )
-#     feeck_flag = Column(
-#         Boolean(),
-#         comment="This exists to provide the user with an opportunity provide feeck on the resulting the sentiment scores. ",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     message = relationship("Message", back_populates="analysis")
-
-#     __tablename__ = "message_analysis"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class MessageAnalysisSummary(Base):
-#     msg_summary_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id", name="fk_message_analysis_summary_repo_1"
-#         ),
-#     )
-#     worker_run_id = Column(BigInteger)
-#     positive_ratio = Column(Float())
-#     negative_ratio = Column(Float())
-#     novel_count = Column(BigInteger)
-#     period = Column(TIMESTAMP())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     # TODO: Ensure that this is a one to one relationship
-#     repo = relationship("Repo", back_populates="msg_analysis_summary")
-
-#     __tablename__ = "message_analysis_summary"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class MessageSentiment(Base):
-#     msg_analysis_id = Column(BigInteger, primary_key=True, nullable=False)
-#     msg_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.message.msg_id", name="fk_message_sentiment_message_1"),
-#     )
-#     worker_run_id = Column(
-#         BigInteger,
-#         comment="This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis.  ",
-#     )
-#     sentiment_score = Column(
-#         Float(),
-#         comment="A sentiment analysis score. Zero is neutral, negative numbers are negative sentiment, and positive numbers are positive sentiment. ",
-#     )
-#     reconstruction_error = Column(
-#         Float(),
-#         comment="Each message is converted to a 250 dimensin doc2vec vector, so the reconstruction error is the difference between what the predicted vector and the actual vector.",
-#     )
-#     novelty_flag = Column(
-#         Boolean(),
-#         comment="This is an analysis of the degree to which the message is novel when compared to other messages in a repository.  For example when bots are producing numerous identical messages, the novelty score is low. It would also be a low novelty score when several people are making the same coment. ",
-#     )
-#     feedback = Column(
-#         Boolean(),
-#         comment="This exists to provide the user with an opportunity provide feedback on the resulting the sentiment scores. ",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     message = relationship("Message", back_populates="sentiment")
-
-#     __tablename__ = "message_sentiment"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class MessageSentimentSummary(Base):
-#     msg_summary_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id", name="fk_message_sentiment_summary_repo_1"
-#         ),
-#     )
-#     worker_run_id = Column(
-#         BigInteger,
-#         comment='This value should reflect the worker_run_id for the messages summarized in the table. There is not a relation between these two tables for that purpose because its not *really*, relationaly a concept unless we create a third table for "worker_run_id", which we determined was unnecessarily complex. ',
-#     )
-#     positive_ratio = Column(Float())
-#     negative_ratio = Column(Float())
-#     novel_count = Column(
-#         BigInteger,
-#         comment="The number of messages identified as novel during the analyzed period",
-#     )
-#     period = Column(
-#         TIMESTAMP(),
-#         comment="The whole timeline is divided into periods based on the definition of time period for analysis, which is user specified. Timestamp of the first period to look at, until the end of messages at the data of execution. ",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     repo = relationship("Repo", back_populates="msg_sentiment_summary")
-
-#     __tablename__ = "message_sentiment_summary"
-#     __table_args__ = {
-#         "schema": "augur_data",
-#         "comment": "In a relationally perfect world, we would have a table called “message_sentiment_run” the incremented the “worker_run_id” for both message_sentiment and message_sentiment_summary. For now, we decided this was overkill. ",
-#     }
-
-
-# class Platform(Base):
-#     pltfrm_id = Column(BigInteger, nullable=False)
-#     pltfrm_name = Column(String())
-#     pltfrm_version = Column(String())
-#     pltfrm_release_date = Column(Date)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     pr_reviews = relationship("PullRequestReviews")
-
-#     __tablename__ = "platform"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("pltfrm_id", name="theplat"),
-#         Index("plat", pltfrm_id, unique=True),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class PullRequestAnalysis(Base):
-#     pull_request_analysis_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_analysis_pull_requests_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#         comment="It would be better if the pull request worker is run first to fetch the latest PRs before analyzing",
-#     )
-#     merge_probability = Column(
-#         Numeric(precision=256, scale=250),
-#         comment="Indicates the probability of the PR being merged",
-#     )
-#     mechanism = Column(
-#         String(),
-#         comment="the ML model used for prediction (It is XGBoost Classifier at present)",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-
-#     pull_request = relationship("PullRequests", back_populates="analysis")
-
-#     __tablename__ = "pull_request_analysis"
-#     __table_args__ = (
-#         Index("pr_anal_idx", pull_request_id),
-#         Index("probability_idx", merge_probability.desc().nullslast()),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.assignees
-# # TODO: Add relationship for cntrb_id
-
-
-# class PullRequestAssignees(Base):
-#     pr_assignee_map_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_assignees_pull_requests_1",
-#         ),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_pull_request_assignees_repo_id",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     contrib_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_pull_request_assignees_contributors_1",
-#         ),
-#     )
-#     pr_assignee_src_id = Column(BigInteger)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_assignees"
-#     __table_args__ = (
-#         Index("pr_meta_cntrb-idx", contrib_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.commits
-# # TODO: Add relationship for cntrb_id
-# class PullRequestCommits(Base):
-#     pr_cmt_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_commits_pull_requests_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_pull_request_commits_repo_id",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     pr_cmt_sha = Column(
-#         String(),
-#         comment="This is the commit SHA for a pull request commit. If the PR is not to the master branch of the main repository (or, in rare cases, from it), then you will NOT find a corresponding commit SHA in the commit table. (see table comment for further explanation). ",
-#     )
-#     pr_cmt_node_id = Column(String())
-#     pr_cmt_message = Column(String())
-#     pr_cmt_comments_url = Column(String())
-#     pr_cmt_author_cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_pr_commit_cntrb_id",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     pr_cmt_timestamp = Column(TIMESTAMP())
-#     pr_cmt_author_email = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_commits"
-#     __table_args__ = (
-#         UniqueConstraint(
-#             "pull_request_id", "repo_id", "pr_cmt_sha", name="pr_commit_nk"
-#         ),
-#         {
-#             "schema": "augur_data",
-#             "comment": "Pull request commits are an enumeration of each commit associated with a pull request. \nNot all pull requests are from a branch or fork into master. \nThe commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project).\nTherefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. \nIn cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. ",
-#         },
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.events
-# # TODO: Add relationship for cntrb_id
-
-
-# class PullRequestEvents(Base):
-#     pr_event_id = Column(BigInteger, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_events_pull_requests_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#         nullable=False,
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fkprevent_repo_id",
-#             ondelete="RESTRICT",
-#             onupdate="RESTRICT",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_pull_request_events_contributors_1",
-#         ),
-#         nullable=False,
-#     )
-#     action = Column(String(), nullable=False)
-#     action_commit_hash = Column(String())
-#     created_at = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-#     issue_event_src_id = Column(
-#         BigInteger,
-#         comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API",
-#     )
-#     node_id = Column(
-#         String(),
-#         comment="This should be renamed to issue_event_src_node_id, as its the varchar identifier in GitHub and likely common in other sources as well. However, since it was created before we came to this naming standard and workers are built around it, we have it simply named as node_id. Anywhere you see node_id in the schema, it comes from GitHubs terminology.",
-#     )
-#     node_url = Column(String())
-#     platform_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.platform.pltfrm_id",
-#             name="fkpr_platform",
-#             ondelete="RESTRICT",
-#             onupdate="RESTRICT",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#         server_default=text("25150"),
-#     )
-#     pr_platform_event_id = Column(BigInteger)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_events"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("pr_event_id", name="pr_events_pkey"),
-#         UniqueConstraint(
-#             "pr_platform_event_id", "platform_id", name="unique-pr-event-id"
-#         ),
-#         Index("pr_events_ibfk_1", pull_request_id),
-#         Index("pr_events_ibfk_2", cntrb_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.files
-# class PullRequestFiles(Base):
-#     pr_file_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_commits_pull_requests_1_copy_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_pull_request_files_repo_id",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     pr_file_additions = Column(BigInteger)
-#     pr_file_deletions = Column(BigInteger)
-#     pr_file_path = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_files"
-#     __table_args__ = (
-#         # TODO: Confirm: Values to determine if insert needed
-#         UniqueConstraint(
-#             "pull_request_id", "repo_id", "pr_file_path", name="prfiles_unique"
-#         ),
-#         {
-#             "schema": "augur_data",
-#             "comment": "Pull request commits are an enumeration of each commit associated with a pull request. \nNot all pull requests are from a branch or fork into master. \nThe commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project).\nTherefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. \nIn cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. ",
-#         },
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.labels
-
-
-# class PullRequestLabels(Base):
-#     pr_label_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_labels_pull_requests_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_pull_request_labels_repo",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     pr_src_id = Column(BigInteger)
-#     pr_src_node_id = Column(String())
-#     pr_src_url = Column(String())
-#     pr_src_description = Column(String())
-#     pr_src_color = Column(String())
-#     pr_src_default_bool = Column(Boolean())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_labels"
-#     __table_args__ = (
-#         # TODO: Confirm: Values to determine if insert needed
-#         UniqueConstraint("pr_src_id", "pull_request_id", name="unique-pr-src-label-id"),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.msg_ref
-
-
-# class PullRequestMessageRef(Base):
-#     pr_msg_ref_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_message_ref_pull_requests_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_pr_repo",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     msg_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.message.msg_id",
-#             name="fk_pull_request_message_ref_message_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     pr_message_ref_src_comment_id = Column(BigInteger)
-#     pr_message_ref_src_node_id = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-#     pr_issue_url = Column(String())
-
-#     message = relationship("Message", back_populates="pull_request")
-#     pull_request = relationship("PullRequests", back_populates="msg_ref")
-
-#     __tablename__ = "pull_request_message_ref"
-#     __table_args__ = (
-#         # TODO: Confirm: Values to determine if insert needed
-#         UniqueConstraint(
-#             "pr_message_ref_src_comment_id", "tool_source", name="pr-comment-nk"
-#         ),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.meta_data
-
-
-# class PullRequestMeta(Base):
-#     pr_repo_meta_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_meta_pull_requests_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_pull_request_repo_meta_repo_id",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     pr_head_or_base = Column(
-#         String(),
-#         comment="Each pull request should have one and only one head record; and one and only one base record. ",
-#     )
-#     pr_src_meta_label = Column(String())
-#     pr_src_meta_ref = Column(String())
-#     pr_sha = Column(String())
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_pull_request_meta_contributors_2",
-#         ),
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_meta"
-#     __table_args__ = (
-#         Index("pr_meta-cntrbid-idx", cntrb_id),
-#         {
-#             "schema": "augur_data",
-#             "comment": 'Pull requests contain referencing metadata.  There are a few columns that are discrete. There are also head and base designations for the repo on each side of the pull request. Similar functions exist in GitLab, though the language here is based on GitHub. The JSON Being adapted to as of the development of this schema is here:      "base": {       "label": "chaoss:dev",       "ref": "dev",       "sha": "dc6c6f3947f7dc84ecba3d8bda641ef786e7027d",       "user": {         "login": "chaoss",         "id": 29740296,         "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2",         "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4",         "gravatar_id": "",         "url": "https://api.github.com/users/chaoss",         "html_url": "https://github.com/chaoss",         "followers_url": "https://api.github.com/users/chaoss/followers",         "following_url": "https://api.github.com/users/chaoss/following{/other_user}",         "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}",         "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}",         "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions",         "organizations_url": "https://api.github.com/users/chaoss/orgs",         "repos_url": "https://api.github.com/users/chaoss/repos",         "events_url": "https://api.github.com/users/chaoss/events{/privacy}",         "received_events_url": "https://api.github.com/users/chaoss/received_events",         "type": "Organization",         "site_admin": false       },       "repo": {         "id": 78134122,         "node_id": "MDEwOlJlcG9zaXRvcnk3ODEzNDEyMg==",         "name": "augur",         "full_name": "chaoss/augur",         "private": false,         "owner": {           "login": "chaoss",           "id": 29740296,           "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2",           "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4",           "gravatar_id": "",           "url": "https://api.github.com/users/chaoss",           "html_url": "https://github.com/chaoss",           "followers_url": "https://api.github.com/users/chaoss/followers",           "following_url": "https://api.github.com/users/chaoss/following{/other_user}",           "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}",           "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}",           "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions",           "organizations_url": "https://api.github.com/users/chaoss/orgs",           "repos_url": "https://api.github.com/users/chaoss/repos",           "events_url": "https://api.github.com/users/chaoss/events{/privacy}",           "received_events_url": "https://api.github.com/users/chaoss/received_events",           "type": "Organization",           "site_admin": false         }, ',
-#         },
-#     )
-
-
-# # TODO: Don't know enough about table structure to create relationship
-
-
-# class PullRequestRepo(Base):
-#     pr_repo_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pr_repo_meta_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_request_meta.pr_repo_meta_id",
-#             name="fk_pull_request_repo_pull_request_meta_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     pr_repo_head_or_base = Column(
-#         String(),
-#         comment="For ease of validation checking, we should determine if the repository referenced is the head or base of the pull request. Each pull request should have one and only one of these, which is not enforcable easily in the database.",
-#     )
-#     pr_src_repo_id = Column(BigInteger)
-#     pr_src_node_id = Column(String())
-#     pr_repo_name = Column(String())
-#     pr_repo_full_name = Column(String())
-#     pr_repo_private_bool = Column(Boolean())
-#     pr_cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_pull_request_repo_contributors_1",
-#         ),
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_repo"
-#     __table_args__ = (
-#         Index("pr-cntrb-idx-repo", pr_cntrb_id),
-#         {
-#             "schema": "augur_data",
-#             "comment": "This table is for storing information about forks that exist as part of a pull request. Generally we do not want to track these like ordinary repositories. ",
-#         },
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.reviews.msg_ref
-
-
-# class PullRequestReviewMessageRef(Base):
-#     pr_review_msg_ref_id = Column(BigInteger, nullable=False)
-#     pr_review_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_request_reviews.pr_review_id",
-#             name="fk_pull_request_review_message_ref_pull_request_reviews_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#         nullable=False,
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_review_repo",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#     )
-#     msg_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.message.msg_id",
-#             name="fk_pull_request_review_message_ref_message_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#         nullable=False,
-#     )
-#     pr_review_msg_url = Column(String())
-#     pr_review_src_id = Column(BigInteger)
-#     pr_review_msg_src_id = Column(BigInteger)
-#     pr_review_msg_node_id = Column(String())
-#     pr_review_msg_diff_hunk = Column(String())
-#     pr_review_msg_path = Column(String())
-#     pr_review_msg_position = Column(BigInteger)
-#     pr_review_msg_original_position = Column(BigInteger)
-#     pr_review_msg_commit_id = Column(String())
-#     pr_review_msg_original_commit_id = Column(String())
-#     pr_review_msg_updated_at = Column(TIMESTAMP())
-#     pr_review_msg_html_url = Column(String())
-#     pr_url = Column(String())
-#     pr_review_msg_author_association = Column(String())
-#     pr_review_msg_start_line = Column(BigInteger)
-#     pr_review_msg_original_start_line = Column(BigInteger)
-#     pr_review_msg_start_side = Column(String())
-#     pr_review_msg_line = Column(BigInteger)
-#     pr_review_msg_original_line = Column(BigInteger)
-#     pr_review_msg_side = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     message = relationship("Message", back_populates="pr_review")
-#     pr_review = relationship("PullRequestReviews", back_populates="msg_ref")
-
-#     __tablename__ = "pull_request_review_message_ref"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("pr_review_msg_ref_id", name="pr_review_msg_ref_id"),
-#         UniqueConstraint("pr_review_msg_src_id", "tool_source", name="pr-review-nk"),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.reviewers
-# # TODO: Add cntrb_id relationship (don't understand table well enough)
-
-
-# class PullRequestReviewers(Base):
-#     pr_reviewer_map_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_reviewers_pull_requests_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     pr_source_id = Column(
-#         BigInteger,
-#         comment="The platform ID for the pull/merge request. Used as part of the natural key, along with pr_reviewer_src_id in this table. ",
-#     )
-#     repo_id = Column(BigInteger)
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_pull_request_reviewers_contributors_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     pr_reviewer_src_id = Column(
-#         BigInteger,
-#         comment="The platform ID for the pull/merge request reviewer. Used as part of the natural key, along with pr_source_id in this table. ",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_reviewers"
-#     __table_args__ = (
-#         UniqueConstraint(
-#             "pr_source_id",
-#             "pr_reviewer_src_id",
-#             name="unique_pr_src_reviewer_key",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#         Index("pr-reviewers-cntrb-idx1", cntrb_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.reviews
-# # TODO: Add relationship for cntrb_id
-
-
-# class PullRequestReviews(Base):
-#     pr_review_id = Column(BigInteger, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_reviews_pull_requests_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#         nullable=False,
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_repo_review",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     cntrb_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_pull_request_reviews_contributors_1",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#         nullable=False,
-#     )
-#     pr_review_author_association = Column(String())
-#     pr_review_state = Column(String())
-#     pr_review_body = Column(String())
-#     pr_review_submitted_at = Column(TIMESTAMP())
-#     pr_review_src_id = Column(BigInteger)
-#     pr_review_node_id = Column(String())
-#     pr_review_html_url = Column(String())
-#     pr_review_pull_request_url = Column(String())
-#     pr_review_commit_id = Column(String())
-#     platform_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.platform.pltfrm_id",
-#             name="fk-review-platform",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#             initially="DEFERRED",
-#             deferrable=True,
-#         ),
-#         server_default=text("25150"),
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     msg_ref = relationship("PullRequestReviewMessageRef", back_populates="pr_review")
-
-#     def get_messages(self):
-
-#         messages = []
-#         for msg_ref in self.msg_ref:
-#             messages.append(msg_ref.message)
-
-#         return messages
-
-#     __tablename__ = "pull_request_reviews"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("pr_review_id", name="pull_request_review_id"),
-#         UniqueConstraint("pr_review_src_id", "tool_source", name="sourcepr-review-id"),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class PullRequestTeams(Base):
-#     pr_team_id = Column(BigInteger, primary_key=True, nullable=False)
-#     pull_request_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.pull_requests.pull_request_id",
-#             name="fk_pull_request_teams_pull_requests_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     pr_src_team_id = Column(BigInteger)
-#     pr_src_team_node = Column(String())
-#     pr_src_team_url = Column(String())
-#     pr_team_name = Column(String())
-#     pr_team_slug = Column(String())
-#     pr_team_description = Column(String())
-#     pr_team_privacy = Column(String())
-#     pr_team_permission = Column(String())
-#     pr_team_src_members_url = Column(String())
-#     pr_team_src_repositories_url = Column(String())
-#     pr_team_parent_id = Column(BigInteger)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "pull_request_teams"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class PullRequests(Base):
-#     pull_request_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="fk_pull_requests_repo_1",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#         server_default=text("0"),
-#     )
-#     pr_url = Column(String())
-#     pr_src_id = Column(
-#         BigInteger, comment="The pr_src_id is unique across all of github."
-#     )
-#     pr_src_node_id = Column(String())
-#     pr_html_url = Column(String())
-#     pr_diff_url = Column(String())
-#     pr_patch_url = Column(String())
-#     pr_issue_url = Column(String())
-#     pr_augur_issue_id = Column(
-#         BigInteger, comment="This is to link to the augur stored related issue"
-#     )
-#     pr_src_number = Column(
-#         BigInteger, comment="The pr_src_number is unique within a repository."
-#     )
-#     pr_src_state = Column(String())
-#     pr_src_locked = Column(Boolean())
-#     pr_src_title = Column(String())
-#     pr_augur_contributor_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.contributors.cntrb_id",
-#             name="fk_pr_contribs",
-#             ondelete="RESTRICT",
-#             onupdate="CASCADE",
-#         ),
-#         comment="This is to link to the augur contributor record. ",
-#     )
-#     pr_body = Column(Text())
-#     pr_created_at = Column(TIMESTAMP())
-#     pr_updated_at = Column(TIMESTAMP())
-#     pr_closed_at = Column(TIMESTAMP())
-#     pr_merged_at = Column(TIMESTAMP())
-#     pr_merge_commit_sha = Column(String())
-#     pr_teams = Column(BigInteger, comment="One to many with pull request teams. ")
-#     pr_milestone = Column(String())
-#     pr_commits_url = Column(String())
-#     pr_review_comments_url = Column(String())
-#     pr_review_comment_url = Column(
-#         String(),
-#         comment="This is a field with limited utility. It does expose how to access a specific comment if needed with parameters. If the source changes URL structure, it may be useful",
-#     )
-#     pr_comments_url = Column(String())
-#     pr_statuses_url = Column(String())
-#     pr_meta_head_id = Column(
-#         String(),
-#         comment="The metadata for the head repo that links to the pull_request_meta table. ",
-#     )
-#     pr_meta_base_id = Column(
-#         String(),
-#         comment="The metadata for the base repo that links to the pull_request_meta table. ",
-#     )
-#     pr_src_issue_url = Column(String())
-#     pr_src_comments_url = Column(String())
-#     pr_src_review_comments_url = Column(String())
-#     pr_src_commits_url = Column(String())
-#     pr_src_statuses_url = Column(String())
-#     pr_src_author_association = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     analysis = relationship("PullRequestAnalysis", back_populates="pull_request")
-#     assignees = relationship("PullRequestAssignees")
-#     commits = relationship("PullRequestCommits")
-#     events = relationship("PullRequestEvents")
-#     files = relationship("PullRequestFiles")
-#     labels = relationship("PullRequestLabels")
-#     msg_ref = relationship("PullRequestMessageRef", back_populates="pull_request")
-#     meta_data = relationship("PullRequestMeta")
-#     reviewers = relationship("PullRequestReviewers")
-#     reviews = relationship("PullRequestReviews")
-#     teams = relationship("PullRequestTeams")
-
-#     def get_messages(self):
-
-#         messages = []
-#         for msg_ref in self.msg_ref:
-#             messages.append(msg_ref.message)
-
-#         return messages
-
-#     __tablename__ = "pull_requests"
-#     __table_args__ = (
-#         Index(
-#             "id_node", pr_src_id.desc().nullsfirst(), pr_src_node_id.desc().nullsfirst()
-#         ),
-#         Index("pull_requests_idx_repo_id_data_datex", repo_id, data_collection_date),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class Releases(Base):
-#     release_id = Column(CHAR(length=64), primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_releases_repo_1"),
-#         nullable=False,
-#     )
-#     release_name = Column(String())
-#     release_description = Column(String())
-#     release_author = Column(String())
-#     release_created_at = Column(TIMESTAMP())
-#     release_published_at = Column(TIMESTAMP())
-#     release_updated_at = Column(TIMESTAMP())
-#     release_is_draft = Column(Boolean())
-#     release_is_prerelease = Column(Boolean())
-#     release_tag_name = Column(String())
-#     release_url = Column(String())
-#     tag_only = Column(Boolean())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "releases"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class Repo(Base):
-#     repo_id = Column(BigInteger, nullable=False)
-#     repo_group_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo_groups.repo_group_id", name="fk_repo_repo_groups_1"
-#         ),
-#         nullable=False,
-#     )
-#     repo_git = Column(String(), nullable=False)
-#     repo_path = Column(String(), server_default="NULL")
-#     repo_name = Column(String(), server_default="NULL")
-#     repo_added = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-#     repo_status = Column(String(), nullable=False, server_default="New")
-#     repo_type = Column(
-#         String(),
-#         server_default="",
-#         comment='This field is intended to indicate if the repository is the "main instance" of a repository in cases where implementations choose to add the same repository to more than one repository group. In cases where the repository group is of rg_type Github Organization then this repo_type should be "primary". In other cases the repo_type should probably be "user created". We made this a varchar in order to hold open the possibility that there are additional repo_types we have not thought about. ',
-#     )
-#     url = Column(String())
-#     owner_id = Column(Integer)
-#     description = Column(String())
-#     primary_language = Column(String())
-#     created_at = Column(String())
-#     forked_from = Column(String())
-#     updated_at = Column(TIMESTAMP())
-#     repo_archived_date_collected = Column(TIMESTAMP(timezone=True))
-#     repo_archived = Column(Integer)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     commits = relationship("Commits")
-#     issues = relationship("Issues")
-#     pull_requests = relationship("PullRequests")
-#     libraries = relationship("Libraries")
-#     messages = relationship("Message")
-
-#     pr_assignees = relationship("PullRequestAssignees")
-#     pr_commits = relationship("PullRequestCommits")
-#     pr_events = relationship("PullRequestEvents")
-#     pr_files = relationship("PullRequestFiles")
-#     pr_labels = relationship("PullRequestLabels")
-#     pr_meta_data = relationship("PullRequestMeta")
-#     pr_reviews = relationship("PullRequestReviews")
-
-#     msg_analysis_summary = relationship("MessageAnalysisSummary", back_populates="repo")
-#     msg_sentiment_summary = relationship(
-#         "MessageSentimentSummary", back_populates="repo"
-#     )
-
-#     lstm_anomaly_results = relationship("LstmAnomalyResults")
-
-#     releases = relationship("Releases")
-#     badges = relationship("RepoBadging")
-#     cluster_messages = relationship("RepoClusterMessages")
-#     dependencies = relationship("RepoDependencies")
-#     deps_libyear = relationship("RepoDepsLibyear")
-#     deps_scorecard_id = relationship("RepoDepsScorecard")
-
-#     info = relationship("RepoInfo")
-#     insights = relationship("RepoInsights")
-#     insight_records = relationship("RepoInsightsRecords")
-
-#     labor = relationship("RepoLabor")
-#     meta_data = relationship("RepoMeta")
-#     sbom_scans = relationship("RepoSbomScans")
-#     stats = relationship("RepoStats")
-#     topic = relationship("RepoTopic")
-
-#     __tablename__ = "repo"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("repo_id", name="repounique"),
-#         Index("forked", forked_from),
-#         Index("repo_idx_repo_id_repo_namex", repo_id, repo_name),
-#         Index("repogitindexrep", repo_git),
-#         Index("reponameindex", repo_name, postgresql_using="hash"),
-#         Index("reponameindexbtree", repo_name),
-#         Index("rggrouponrepoindex", repo_group_id),
-#         Index("therepo", repo_id, unique=True),
-#         {
-#             "schema": "augur_data",
-#             "comment": "This table is a combination of the columns in Facade’s repo table and GHTorrent’s projects table. ",
-#         },
-#     )
-
-
-# class RepoBadging(Base):
-#     badge_collection_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_badging_repo_1"),
-#     )
-#     created_at = Column(TIMESTAMP(), server_default=func.current_timestamp())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-#     data = Column(JSONB())
-
-#     __tablename__ = "repo_badging"
-#     __table_args__ = {
-#         "schema": "augur_data",
-#         "comment": "This will be collected from the LF’s Badging API\nhttps://bestpractices.coreinfrastructure.org/projects.json?pq=https%3A%2F%2Fgithub.com%2Fchaoss%2Faugur\n",
-#     }
-
-
-# class RepoClusterMessages(Base):
-#     msg_cluster_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_cluster_messages_repo_1"),
-#     )
-#     cluster_content = Column(Integer)
-#     cluster_mechanism = Column(Integer)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_cluster_messages"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class RepoDependencies(Base):
-#     repo_dependencies_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="repo_id"),
-#         comment="Forign key for repo id. ",
-#     )
-#     dep_name = Column(String(), comment="Name of the dependancy found in project. ")
-#     dep_count = Column(Integer, comment="Number of times the dependancy was found. ")
-#     dep_language = Column(String(), comment="Language of the dependancy. ")
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_dependencies"
-#     __table_args__ = {
-#         "schema": "augur_data",
-#         "comment": "Contains the dependencies for a repo.",
-#     }
-
-
-# # TODO: typo in field current_verion
-# class RepoDepsLibyear(Base):
-#     repo_deps_libyear_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger, ForeignKey("augur_data.repo.repo_id", name="repo_id_copy_2")
-#     )
-#     name = Column(String())
-#     requirement = Column(String())
-#     type = Column(String())
-#     package_manager = Column(String())
-#     current_verion = Column(String())
-#     latest_version = Column(String())
-#     current_release_date = Column(String())
-#     latest_release_date = Column(String())
-#     libyear = Column(Float())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_deps_libyear"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class RepoDepsScorecard(Base):
-#     repo_deps_scorecard_id = Column(BigInteger, nullable=False)
-#     repo_id = Column(
-#         BigInteger, ForeignKey("augur_data.repo.repo_id", name="repo_id_copy_1")
-#     )
-#     name = Column(String())
-#     status = Column(String())
-#     score = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_deps_scorecard"
-#     __table_args__ = (
-#         PrimaryKeyConstraint(
-#             "repo_deps_scorecard_id", name="repo_deps_scorecard_pkey1"
-#         ),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class RepoGroupInsights(Base):
-#     rgi_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_group_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo_groups.repo_group_id",
-#             name="fk_repo_group_insights_repo_groups_1",
-#         ),
-#     )
-#     rgi_metric = Column(String())
-#     rgi_value = Column(String())
-#     cms_id = Column(BigInteger)
-#     rgi_fresh = Column(
-#         Boolean(),
-#         comment='false if the date is before the statistic that triggered the insight, true if after. This allows us to automatically display only "fresh insights" and avoid displaying "stale insights". The insight worker will populate this table. ',
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     repo_group = relationship("RepoGroups")
-
-#     __tablename__ = "repo_group_insights"
-#     __table_args__ = {
-#         "schema": "augur_data",
-#         "comment": 'This table is output from an analytical worker inside of Augur. It runs through the different metrics on a REPOSITORY_GROUP and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. \n\nWorker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. ',
-#     }
-
-
-# class RepoGroups(Base):
-#     repo_group_id = Column(BigInteger, nullable=False)
-#     rg_name = Column(String(), nullable=False)
-#     rg_description = Column(String(), server_default="NULL")
-#     rg_website = Column(String(), server_default="NULL")
-#     rg_recache = Column(SmallInteger, server_default=text("1"))
-#     rg_last_modified = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-#     rg_type = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     repos = relationship("Repo")
-#     rg_list_serve = relationship("RepoGroupsListServe")
-
-#     __tablename__ = "repo_groups"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("repo_group_id", name="rgid"),
-#         Index("rgidm", repo_group_id, unique=True),
-#         Index("rgnameindex", rg_name),
-#         {
-#             "schema": "augur_data",
-#             "comment": "rg_type is intended to be either a GitHub Organization or a User Created Repo Group. ",
-#         },
-#     )
-
-
-# class RepoGroupsListServe(Base):
-#     rgls_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_group_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo_groups.repo_group_id",
-#             name="fk_repo_groups_list_serve_repo_groups_1",
-#         ),
-#         nullable=False,
-#     )
-#     rgls_name = Column(String())
-#     rgls_description = Column(String())
-#     rgls_sponsor = Column(String())
-#     rgls_email = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_groups_list_serve"
-#     __table_args__ = (
-#         UniqueConstraint("rgls_id", "repo_group_id", name="rglistserve"),
-#         Index("lister", rgls_id, repo_group_id, unique=True),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class RepoInfo(Base):
-#     repo_info_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_info_repo_1"),
-#         nullable=False,
-#     )
-#     last_updated = Column(TIMESTAMP())
-#     issues_enabled = Column(String())
-#     open_issues = Column(Integer)
-#     pull_requests_enabled = Column(String())
-#     wiki_enabled = Column(String())
-#     pages_enabled = Column(String())
-#     fork_count = Column(Integer)
-#     default_branch = Column(String())
-#     watchers_count = Column(Integer)
-#     UUID = Column(Integer)
-#     license = Column(String())
-#     stars_count = Column(Integer)
-#     committers_count = Column(Integer)
-#     issue_contributors_count = Column(String())
-#     changelog_file = Column(String())
-#     contributing_file = Column(String())
-#     license_file = Column(String())
-#     code_of_conduct_file = Column(String())
-#     security_issue_file = Column(String())
-#     security_audit_file = Column(String())
-#     status = Column(String())
-#     keywords = Column(String())
-#     commit_count = Column(BigInteger)
-#     issues_count = Column(BigInteger)
-#     issues_closed = Column(BigInteger)
-#     pull_request_count = Column(BigInteger)
-#     pull_requests_open = Column(BigInteger)
-#     pull_requests_closed = Column(BigInteger)
-#     pull_requests_merged = Column(BigInteger)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_info"
-#     __table_args__ = (
-#         # TODO: Their appears to be two of the same index in current database
-#         Index("repo_info_idx_repo_id_data_date_1x", repo_id, data_collection_date),
-#         {"schema": "augur_data"},
-#     )
-
-
-# # TODO: Why is numeric defined without level or precision?
-# class RepoInsights(Base):
-#     ri_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_insights_repo_1"),
-#     )
-#     ri_metric = Column(String())
-#     ri_value = Column(String())
-#     ri_date = Column(TIMESTAMP())
-#     ri_fresh = Column(
-#         Boolean(),
-#         comment='false if the date is before the statistic that triggered the insight, true if after. This allows us to automatically display only "fresh insights" and avoid displaying "stale insights". The insight worker will populate this table. ',
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-#     ri_score = Column(Numeric())
-#     ri_field = Column(String())
-#     ri_detection_method = Column(String())
-
-#     __tablename__ = "repo_insights"
-#     __table_args__ = {
-#         "schema": "augur_data",
-#         "comment": 'This table is output from an analytical worker inside of Augur. It runs through the different metrics on a repository and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. \n\nWorker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. ',
-#     }
-
-
-# class RepoInsightsRecords(Base):
-#     ri_id = Column(
-#         BigInteger, primary_key=True, nullable=False, comment="Primary key. "
-#     )
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="repo_id_ref",
-#             ondelete="SET NULL",
-#             onupdate="CASCADE",
-#         ),
-#         comment="Refers to repo table primary key. Will have a foreign key",
-#     )
-#     ri_metric = Column(String(), comment="The metric endpoint")
-#     ri_field = Column(String(), comment="The field in the metric endpoint")
-#     ri_value = Column(String(), comment="The value of the endpoint in ri_field")
-#     ri_date = Column(
-#         TIMESTAMP(),
-#         comment="The date the insight is for; in other words, some anomaly occurred on this date. ",
-#     )
-#     ri_score = Column(Float(), comment="A Score, derived from the algorithm used. ")
-#     ri_detection_method = Column(
-#         String(),
-#         comment='A confidence interval or other expression of the type of threshold and the value of a threshold met in order for it to be "an insight". Example. "95% confidence interval". ',
-#     )
-#     tool_source = Column(String(), comment="Standard Augur Metadata")
-#     tool_version = Column(String(), comment="Standard Augur Metadata")
-#     data_source = Column(String(), comment="Standard Augur Metadata")
-#     data_collection_date = Column(
-#         TIMESTAMP(),
-#         server_default=func.current_timestamp(),
-#         comment="Standard Augur Metadata",
-#     )
-
-#     __tablename__ = "repo_insights_records"
-#     __table_args__ = (Index("dater", ri_date), {"schema": "augur_data"})
-
-
-# class RepoLabor(Base):
-#     repo_labor_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_labor_repo_1"),
-#     )
-#     repo_clone_date = Column(TIMESTAMP())
-#     rl_analysis_date = Column(TIMESTAMP())
-#     programming_language = Column(String())
-#     file_path = Column(String())
-#     file_name = Column(String())
-#     total_lines = Column(Integer)
-#     code_lines = Column(Integer)
-#     comment_lines = Column(Integer)
-#     blank_lines = Column(Integer)
-#     code_complexity = Column(Integer)
-#     repo_url = Column(
-#         String(),
-#         comment="This is a convenience column to simplify analysis against external datasets",
-#     )
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_labor"
-#     __table_args__ = {
-#         "schema": "augur_data",
-#         "comment": "repo_labor is a derivative of tables used to store scc code and complexity counting statistics that are inputs to labor analysis, which are components of CHAOSS value metric calculations. ",
-#     }
-
-
-# class RepoMeta(Base):
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_meta_repo_1"),
-#         primary_key=True,
-#         nullable=False,
-#     )
-#     rmeta_id = Column(BigInteger, primary_key=True, nullable=False)
-#     rmeta_name = Column(String())
-#     rmeta_value = Column(String(), server_default=text("0"))
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_meta"
-#     __table_args__ = {"schema": "augur_data", "comment": "Project Languages"}
-
-
-# class RepoSbomScans(Base):
-#     rsb_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "augur_data.repo.repo_id",
-#             name="repo_linker_sbom",
-#             ondelete="CASCADE",
-#             onupdate="CASCADE",
-#         ),
-#     )
-#     sbom_scan = Column(JSON())
-
-#     __tablename__ = "repo_sbom_scans"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class RepoStats(Base):
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_stats_repo_1"),
-#         primary_key=True,
-#         nullable=False,
-#     )
-#     rstat_id = Column(BigInteger, primary_key=True, nullable=False)
-#     rstat_name = Column(String())
-#     rstat_value = Column(BigInteger)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_stats"
-#     __table_args__ = {"schema": "augur_data", "comment": "Project Watchers"}
-
-
-# class RepoTestCoverage(Base):
-#     repo_id = Column(
-#         BigInteger,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_test_coverage_repo_1"),
-#         primary_key=True,
-#         nullable=False,
-#     )
-#     repo_clone_date = Column(TIMESTAMP())
-#     rtc_analysis_date = Column(TIMESTAMP())
-#     programming_language = Column(String())
-#     file_path = Column(String())
-#     file_name = Column(String())
-#     testing_tool = Column(String())
-#     file_statement_count = Column(BigInteger)
-#     file_subroutine_count = Column(BigInteger)
-#     file_statements_tested = Column(BigInteger)
-#     file_subroutines_tested = Column(BigInteger)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_test_coverage"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class RepoTopic(Base):
-#     repo_topic_id = Column(BigInteger, primary_key=True, nullable=False)
-#     repo_id = Column(
-#         Integer,
-#         ForeignKey("augur_data.repo.repo_id", name="fk_repo_topic_repo_1"),
-#     )
-#     topic_id = Column(Integer)
-#     topic_prob = Column(Float())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "repo_topic"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# # TODO: Add foreign key to repo table
-
-
-# class ReposFetchLog(Base):
-#     repos_fetch_log_id = Column(BigInteger, primary_key=True)
-#     repos_id = Column(Integer, nullable=False)
-#     status = Column(String(), nullable=False)
-#     date = Column(TIMESTAMP(), nullable=False, server_default=func.current_timestamp())
-
-#     __tablename__ = "repos_fetch_log"
-#     __table_args__ = (
-#         # TODO: There appear to be two identical indexes
-#         Index("repos_id,status", repos_id, status),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class Settings(Base):
-#     id = Column(Integer, primary_key=True, nullable=False)
-#     setting = Column(String(), nullable=False)
-#     value = Column(String(), nullable=False)
-#     last_modified = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-
-#     __tablename__ = "settings"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# class TopicWords(Base):
-#     topic_words_id = Column(BigInteger, primary_key=True, nullable=False)
-#     topic_id = Column(BigInteger)
-#     word = Column(String())
-#     word_prob = Column(Float())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "topic_words"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# # TODO: Add foreign key to repo_group table
-
-
-# class UnknownCache(Base):
-#     unknown_cache_id = Column(BigInteger, primary_key=True)
-#     type = Column(String(), nullable=False)
-#     repo_group_id = Column(Integer, nullable=False)
-#     email = Column(String(), nullable=False)
-#     domain = Column(String(), server_default="NULL")
-#     added = Column(BigInteger, nullable=False)
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "unknown_cache"
-#     __table_args__ = (
-#         Index("type,projects_id", type, repo_group_id),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class UnresolvedCommitEmails(Base):
-#     email_unresolved_id = Column(BigInteger, primary_key=True, nullable=False)
-#     email = Column(String(), nullable=False)
-#     name = Column(String())
-#     tool_source = Column(String())
-#     tool_version = Column(String())
-#     data_source = Column(String())
-#     data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "unresolved_commit_emails"
-#     __table_args__ = (
-#         UniqueConstraint("email", name="unresolved_commit_emails_email_key"),
-#         {"schema": "augur_data"},
-#     )
-
-
-# class UtilityLog(Base):
-#     id = Column(BigInteger, primary_key=True, nullable=False)
-#     level = Column(String(), nullable=False)
-#     status = Column(String(), nullable=False)
-#     attempted = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-
-#     __tablename__ = "utility_log"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# # TODO: Add foreign key to repo table
-
-
-# class WorkingCommits(Base):
-#     working_commits_id = Column(BigInteger, primary_key=True)
-#     repos_id = Column(Integer, nullable=False)
-#     working_commit = Column(String(), server_default="NULL")
-
-#     __tablename__ = "working_commits"
-#     __table_args__ = {"schema": "augur_data"}
-
-
-# # class WorkingCommits(Base):
-# #     working_commits_id = Column(BigInteger)
-# #     repos_id = Column(Integer, nullable=False)
-# #     working_commit = Column(String())
-
-# #     __tablename__ = 'working_commits'
-# #     __table_args__ = (
-# #         PrimaryKeyConstraint('working_commits_id'),
-# #         {"schema":"augur_operations"}
-# #     )
diff --git a/augur/application/db/models/augur_operations_old.py b/augur/application/db/models/augur_operations_old.py
deleted file mode 100644
index 898b6f77f2..0000000000
--- a/augur/application/db/models/augur_operations_old.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# from augur.application.db.models.base import Base
-# from sqlalchemy import (
-#     Index,
-#     Column,
-#     Integer,
-#     String,
-#     UniqueConstraint,
-#     BigInteger,
-#     TIMESTAMP,
-#     PrimaryKeyConstraint,
-#     func,
-#     text,
-# )
-
-# # Start of Augur Operations tablespoon
-# class All(Base):
-#     all_id = Column(BigInteger, primary_key=True)
-#     Name = Column(String())
-#     Bytes = Column(String())
-#     Lines = Column(String())
-#     Code = Column(String())
-#     Comment = Column(String())
-#     Blank = Column(String())
-#     Complexity = Column(String())
-#     Count = Column(String())
-#     WeightedComplexity = Column(String())
-#     Files = Column(String())
-
-#     __tablename__ = "all"
-#     __table_args__ = {"schema": "augur_operations"}
-
-
-# class AugurSettings(Base):
-#     id = Column(BigInteger)
-#     setting = Column(String())
-#     value = Column(String())
-#     last_modified = Column(TIMESTAMP(), server_default=func.current_timestamp())
-
-#     __tablename__ = "augur_settings"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("id"),
-#         UniqueConstraint("setting", name="setting-unique"),
-#         {"schema": "augur_operations"},
-#     )
-
-
-# class ReposFetchLog(Base):
-#     repos_fetch_log_id = Column(BigInteger)
-#     repos_id = Column(Integer, nullable=False)
-#     status = Column(String(), nullable=False)
-#     date = Column(TIMESTAMP(), nullable=False, server_default=func.current_timestamp())
-
-#     __tablename__ = "repos_fetch_log"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("repos_fetch_log_id"),
-#         Index("repos_id,statusops", repos_id, status),
-#         {"schema": "augur_operations"},
-#     )
-
-
-# # TODO: Add foreign key to Repo table
-# class WorkerHistory(Base):
-#     history_id = Column(BigInteger)
-#     repo_id = Column(BigInteger)
-#     worker = Column(String(), nullable=False)
-#     job_model = Column(String(), nullable=False)
-#     oauth_id = Column(Integer)
-#     timestamp = Column(TIMESTAMP(), nullable=False)
-#     status = Column(String(), nullable=False)
-#     total_results = Column(Integer)
-
-#     __tablename__ = "worker_history"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("history_id", name="history_pkey"),
-#         {"schema": "augur_operations"},
-#     )
-
-
-# class WorkerJob(Base):
-#     job_model = Column(String())
-#     state = Column(Integer, nullable=False, server_default=text("0"))
-#     zombie_head = Column(Integer)
-#     since_id_str = Column(String(), nullable=False, server_default="0")
-#     description = Column(String(), server_default="None")
-#     last_count = Column(Integer)
-#     last_run = Column(TIMESTAMP())
-#     analysis_state = Column(Integer, server_default=text("0"))
-#     oauth_id = Column(Integer, nullable=False)
-
-#     __tablename__ = "worker_job"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("job_model", name="job_pkey"),
-#         {"schema": "augur_operations"},
-#     )
-
-
-# class WorkerOauth(Base):
-#     oauth_id = Column(BigInteger)
-#     name = Column(String(), nullable=False)
-#     consumer_key = Column(String(), nullable=False)
-#     consumer_secret = Column(String(), nullable=False)
-#     access_token = Column(String(), nullable=False)
-#     access_token_secret = Column(String(), nullable=False)
-#     repo_directory = Column(String())
-#     platform = Column(String(), server_default="github")
-
-#     __tablename__ = "worker_oauth"
-#     __table_args__ = (PrimaryKeyConstraint("oauth_id"), {"schema": "augur_operations"})
-
-
-# class WorkerSettingsFacade(Base):
-#     id = Column(Integer)
-#     setting = Column(String(), nullable=False)
-#     value = Column(String(), nullable=False)
-#     last_modified = Column(
-#         TIMESTAMP(), nullable=False, server_default=func.current_timestamp()
-#     )
-
-#     __tablename__ = "worker_settings_facade"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("id", name="settings_pkey"),
-#         {"schema": "augur_operations"},
-#     )
diff --git a/augur/application/db/models/spdx_old.py b/augur/application/db/models/spdx_old.py
deleted file mode 100644
index 7c5ffdcb5c..0000000000
--- a/augur/application/db/models/spdx_old.py
+++ /dev/null
@@ -1,525 +0,0 @@
-# from augur.application.db.models.base import Base
-# from sqlalchemy import (
-#     Column,
-#     Integer,
-#     String,
-#     UniqueConstraint,
-#     PrimaryKeyConstraint,
-#     ForeignKey,
-#     Text,
-#     Boolean,
-#     TIMESTAMP,
-#     JSON,
-# )
-
-
-# class AnnotationTypes(Base):
-#     annotation_type_id = Column(Integer, primary_key=True)
-#     name = Column(String(), nullable=False)
-
-#     __tablename__ = "annotation_types"
-#     __table_args__ = (
-#         UniqueConstraint("name", name="uc_annotation_type_name"),
-#         {"schema": "spdx"},
-#     )
-
-
-# class Annotations(Base):
-#     annotation_id = Column(Integer, primary_key=True, nullable=False)
-#     document_id = Column(
-#         Integer,
-#         ForeignKey("spdx.documents.document_id", name="annotations_document_id_fkey"),
-#         nullable=False,
-#     )
-#     annotation_type_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.annotation_types.annotation_type_id",
-#             name="annotations_annotation_type_id_fkey",
-#         ),
-#         nullable=False,
-#     )
-#     identifier_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.identifiers.identifier_id", name="annotations_identifier_id_fkey"
-#         ),
-#         nullable=False,
-#     )
-#     creator_id = Column(
-#         Integer,
-#         ForeignKey("spdx.creators.creator_id", name="annotations_creator_id_fkey"),
-#         nullable=False,
-#     )
-#     created_ts = Column(TIMESTAMP(timezone=True))
-#     comment = Column(Text(), nullable=False)
-
-#     __tablename__ = "annotations"
-#     __table_args__ = {"schema": "spdx"}
-
-
-# class AugurRepoMap(Base):
-#     map_id = Column(Integer, primary_key=True, nullable=False)
-#     dosocs_pkg_id = Column(Integer)
-#     dosocs_pkg_name = Column(Text())
-#     repo_id = Column(Integer)
-#     repo_path = Column(Text())
-
-#     __tablename__ = "augur_repo_map"
-#     __table_args__ = {"schema": "spdx"}
-
-
-# class CreatorTypes(Base):
-#     creator_type_id = Column(Integer, primary_key=True, nullable=False)
-#     name = Column(String(), nullable=False)
-
-#     __tablename__ = "creator_types"
-#     __table_args__ = {"schema": "spdx"}
-
-
-# class Creators(Base):
-#     creator_id = Column(Integer, primary_key=True, nullable=False)
-#     creator_type_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.creator_types.creator_type_id", name="creators_creator_type_id_fkey"
-#         ),
-#         nullable=False,
-#     )
-#     name = Column(String(), nullable=False)
-#     email = Column(String(), nullable=False)
-
-#     __tablename__ = "creators"
-#     __table_args__ = {"schema": "spdx"}
-
-
-# class DocumentNamespaces(Base):
-#     document_namespace_id = Column(Integer, primary_key=True, nullable=False)
-#     uri = Column(String(), nullable=False)
-
-#     __tablename__ = "document_namespaces"
-#     __table_args__ = (
-#         UniqueConstraint("uri", name="uc_document_namespace_uri"),
-#         {"schema": "spdx"},
-#     )
-
-
-# class Documents(Base):
-#     document_id = Column(Integer, primary_key=True, nullable=False)
-#     document_namespace_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.document_namespaces.document_namespace_id",
-#             name="documents_document_namespace_id_fkey",
-#         ),
-#         nullable=False,
-#     )
-#     data_license_id = Column(
-#         Integer,
-#         ForeignKey("spdx.licenses.license_id", name="documents_data_license_id_fkey"),
-#         nullable=False,
-#     )
-#     spdx_version = Column(String(), nullable=False)
-#     name = Column(String(), nullable=False)
-#     license_list_version = Column(String(), nullable=False)
-#     created_ts = Column(TIMESTAMP(timezone=True), nullable=False)
-#     creator_comment = Column(Text(), nullable=False)
-#     document_comment = Column(Text(), nullable=False)
-#     package_id = Column(
-#         Integer,
-#         ForeignKey("spdx.packages.package_id", name="documents_package_id_fkey"),
-#         nullable=False,
-#     )
-
-#     __tablename__ = "documents"
-#     __table_args__ = (
-#         UniqueConstraint(
-#             "document_namespace_id", name="uc_document_document_namespace_id"
-#         ),
-#         {"schema": "spdx"},
-#     )
-
-
-# class DocumentsCreators(Base):
-#     document_creator_id = Column(Integer, primary_key=True, nullable=False)
-#     document_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.documents.document_id", name="documents_creators_document_id_fkey"
-#         ),
-#         nullable=False,
-#     )
-#     creator_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.creators.creator_id", name="documents_creators_creator_id_fkey"
-#         ),
-#         nullable=False,
-#     )
-
-#     __tablename__ = "documents_creators"
-#     __table_args__ = {"schema": "spdx"}
-
-
-# class ExternalRefs(Base):
-#     external_ref_id = Column(Integer, primary_key=True, nullable=False)
-#     document_id = Column(
-#         Integer,
-#         ForeignKey("spdx.documents.document_id", name="external_refs_document_id_fkey"),
-#         nullable=False,
-#     )
-#     document_namespace_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.document_namespaces.document_namespace_id",
-#             name="external_refs_document_namespace_id_fkey",
-#         ),
-#         nullable=False,
-#     )
-#     id_string = Column(String(), nullable=False)
-#     sha256 = Column(String(), nullable=False)
-
-#     __tablename__ = "external_refs"
-#     __table_args__ = (
-#         UniqueConstraint(
-#             "document_id", "id_string", name="uc_external_ref_document_id_string"
-#         ),
-#         {"schema": "spdx"},
-#     )
-
-
-# class FileContributors(Base):
-#     file_contributor_id = Column(Integer, primary_key=True, nullable=False)
-#     file_id = Column(
-#         Integer,
-#         ForeignKey("spdx.files.file_id", name="file_contributors_file_id_fkey"),
-#         nullable=False,
-#     )
-#     contributor = Column(Text(), nullable=False)
-
-#     __tablename__ = "file_contributors"
-#     __table_args__ = {"schema": "spdx"}
-
-
-# class FileTypes(Base):
-#     file_type_id = Column(Integer)
-#     name = Column(String(), nullable=False)
-
-#     __tablename__ = "file_types"
-#     __table_args__ = (
-#         PrimaryKeyConstraint("name", name="uc_file_type_name"),
-#         {"schema": "spdx"},
-#     )
-
-
-# class Files(Base):
-#     file_id = Column(Integer, primary_key=True, nullable=False)
-#     file_type_id = Column(Integer)
-#     sha256 = Column(String(), nullable=False)
-#     copyright_text = Column(Text())
-#     package_id = Column(Integer)
-#     comment = Column(Text(), nullable=False)
-#     notice = Column(Text(), nullable=False)
-
-#     __tablename__ = "files"
-#     __table_args__ = (
-#         UniqueConstraint("sha256", name="uc_file_sha256"),
-#         {"schema": "spdx"},
-#     )
-
-
-# class FilesLicenses(Base):
-#     file_license_id = Column(Integer, primary_key=True, nullable=False)
-#     file_id = Column(
-#         Integer,
-#         ForeignKey("spdx.files.file_id", name="files_licenses_file_id_fkey"),
-#         nullable=False,
-#     )
-#     license_id = Column(
-#         Integer,
-#         ForeignKey("spdx.licenses.license_id", name="files_licenses_license_id_fkey"),
-#         nullable=False,
-#     )
-#     extracted_text = Column(Text(), nullable=False)
-
-#     __tablename__ = "files_licenses"
-#     __table_args__ = (
-#         UniqueConstraint("file_id", "license_id", name="uc_file_license"),
-#         {"schema": "spdx"},
-#     )
-
-
-# class FilesScans(Base):
-#     file_scan_id = Column(Integer, primary_key=True, nullable=False)
-#     file_id = Column(
-#         Integer,
-#         ForeignKey("spdx.files.file_id", name="files_scans_file_id_fkey"),
-#         nullable=False,
-#     )
-#     scanner_id = Column(
-#         Integer,
-#         ForeignKey("spdx.scanners.scanner_id", name="files_scans_scanner_id_fkey"),
-#         nullable=False,
-#     )
-
-#     __tablename__ = "files_scans"
-#     __table_args__ = (
-#         UniqueConstraint("file_id", "scanner_id", name="uc_file_scanner_id"),
-#         {"schema": "spdx"},
-#     )
-
-
-# # TODO: Add check to table
-
-
-# class Identifiers(Base):
-#     identifier_id = Column(Integer, primary_key=True, nullable=False)
-#     document_namespace_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.document_namespaces.document_namespace_id",
-#             name="identifiers_document_namespace_id_fkey",
-#         ),
-#         nullable=False,
-#     )
-#     id_string = Column(String(), nullable=False)
-#     document_id = Column(
-#         Integer,
-#         ForeignKey("spdx.documents.document_id", name="identifiers_document_id_fkey"),
-#     )
-#     package_id = Column(
-#         Integer,
-#         ForeignKey("spdx.packages.package_id", name="identifiers_package_id_fkey"),
-#     )
-#     package_file_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.packages_files.package_file_id",
-#             name="identifiers_package_file_id_fkey",
-#         ),
-#     )
-
-#     __tablename__ = "identifiers"
-#     __table_args__ = (
-#         UniqueConstraint(
-#             "document_namespace_id",
-#             "id_string",
-#             name="uc_identifier_document_namespace_id",
-#         ),
-#         UniqueConstraint(
-#             "document_namespace_id",
-#             "document_id",
-#             name="uc_identifier_namespace_document_id",
-#         ),
-#         UniqueConstraint(
-#             "document_namespace_id",
-#             "package_id",
-#             name="uc_identifier_namespace_package_id",
-#         ),
-#         UniqueConstraint(
-#             "document_namespace_id",
-#             "package_file_id",
-#             name="uc_identifier_namespace_package_file_id",
-#         ),
-#         {"schema": "spdx"},
-#     )
-
-
-# class Licenses(Base):
-#     license_id = Column(Integer, primary_key=True)
-#     name = Column(String())
-#     short_name = Column(String(), nullable=False)
-#     cross_reference = Column(Text(), nullable=False)
-#     comment = Column(Text(), nullable=False)
-#     is_spdx_official = Column(Boolean(), nullable=False)
-
-#     __tablename__ = "licenses"
-#     __table_args__ = (
-#         UniqueConstraint("short_name", name="uc_license_short_name"),
-#         {"schema": "spdx"},
-#     )
-
-
-# # TODO: Need to a check
-
-
-# class Packages(Base):
-#     package_id = Column(Integer, primary_key=True)
-#     name = Column(String(), nullable=False)
-#     version = Column(String(), nullable=False)
-#     file_name = Column(Text(), nullable=False)
-#     supplier_id = Column(
-#         Integer,
-#         ForeignKey("spdx.creators.creator_id", name="packages_supplier_id_fkey"),
-#     )
-#     originator_id = Column(
-#         Integer,
-#         ForeignKey("spdx.creators.creator_id", name="packages_originator_id_fkey"),
-#     )
-#     download_location = Column(Text())
-#     verification_code = Column(String(), nullable=False)
-#     ver_code_excluded_file_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.packages_files.package_file_id", name="fk_package_packages_files"
-#         ),
-#     )
-#     sha256 = Column(String())
-#     home_page = Column(Text())
-#     source_info = Column(Text(), nullable=False)
-#     concluded_license_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.licenses.license_id", name="packages_concluded_license_id_fkey"
-#         ),
-#     )
-#     declared_license_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.licenses.license_id", name="packages_declared_license_id_fkey"
-#         ),
-#     )
-#     license_comment = Column(Text(), nullable=False)
-#     copyright_text = Column(Text())
-#     summary = Column(Text(), nullable=False)
-#     description = Column(Text(), nullable=False)
-#     comment = Column(Text(), nullable=False)
-#     dosocs2_dir_code = Column(String())
-
-#     __tablename__ = "packages"
-#     __table_args__ = (
-#         UniqueConstraint("sha256", name="uc_package_sha256"),
-#         UniqueConstraint(
-#             "verification_code", "dosocs2_dir_code", name="uc_dir_code_ver_code"
-#         ),
-#         {"schema": "spdx"},
-#     )
-
-
-# class PackagesFiles(Base):
-#     package_file_id = Column(Integer, primary_key=True)
-#     package_id = Column(
-#         Integer,
-#         ForeignKey("spdx.packages.package_id", name="fk_package_files_packages"),
-#         nullable=False,
-#     )
-#     file_id = Column(
-#         Integer,
-#         ForeignKey("spdx.files.file_id", name="packages_files_file_id_fkey"),
-#         nullable=False,
-#     )
-#     concluded_license_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.licenses.license_id", name="packages_files_concluded_license_id_fkey"
-#         ),
-#     )
-#     license_comment = Column(Text(), nullable=False)
-#     file_name = Column(Text(), nullable=False)
-
-#     __tablename__ = "packages_files"
-#     __table_args__ = (
-#         UniqueConstraint("package_id", "file_name", name="uc_package_id_file_name"),
-#         {"schema": "spdx"},
-#     )
-
-
-# class PackagesScans(Base):
-#     package_scan_id = Column(Integer, primary_key=True)
-#     package_id = Column(
-#         Integer,
-#         ForeignKey("spdx.packages.package_id", name="packages_scans_package_id_fkey"),
-#         nullable=False,
-#     )
-#     scanner_id = Column(
-#         Integer,
-#         ForeignKey("spdx.scanners.scanner_id", name="packages_scans_scanner_id_fkey"),
-#         nullable=False,
-#     )
-
-#     __tablename__ = "packages_scans"
-#     __table_args__ = (
-#         UniqueConstraint("package_id", "scanner_id", name="uc_package_scanner_id"),
-#         {"schema": "spdx"},
-#     )
-
-
-# class Projects(Base):
-#     package_id = Column(Integer, primary_key=True)
-#     name = Column(Text(), nullable=False)
-#     homepage = Column(Text(), nullable=False)
-#     uri = Column(Text(), nullable=False)
-
-#     __tablename__ = "projects"
-#     __table_args__ = {"schema": "spdx"}
-
-
-# class RelationshipTypes(Base):
-#     relationship_type_id = Column(Integer, primary_key=True)
-#     name = Column(String(), nullable=False)
-
-#     __tablename__ = "relationship_types"
-#     __table_args__ = (
-#         UniqueConstraint("name", name="uc_relationship_type_name"),
-#         {"schema": "spdx"},
-#     )
-
-
-# class Relationships(Base):
-#     relationship_id = Column(Integer, primary_key=True)
-#     left_identifier_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.identifiers.identifier_id",
-#             name="relationships_left_identifier_id_fkey",
-#         ),
-#         nullable=False,
-#     )
-#     right_identifier_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.identifiers.identifier_id",
-#             name="relationships_right_identifier_id_fkey",
-#         ),
-#         nullable=False,
-#     )
-#     relationship_type_id = Column(
-#         Integer,
-#         ForeignKey(
-#             "spdx.relationship_types.relationship_type_id",
-#             name="relationships_relationship_type_id_fkey",
-#         ),
-#         nullable=False,
-#     )
-#     relationship_comment = Column(Text(), nullable=False)
-
-#     __tablename__ = "relationships"
-#     __table_args__ = (
-#         UniqueConstraint(
-#             "left_identifier_id",
-#             "right_identifier_id",
-#             "relationship_type_id",
-#             name="uc_left_right_relationship_type",
-#         ),
-#         {"schema": "spdx"},
-#     )
-
-
-# class SbomScans(Base):
-#     sbom_scan_id = Column(Integer, primary_key=True)
-#     repo_id = Column(Integer)
-#     sbom_scan = Column(JSON())
-
-#     __tablename__ = "sbom_scans"
-#     __table_args__ = {"schema": "spdx"}
-
-
-# class Scanners(Base):
-#     scanner_id = Column(Integer, primary_key=True)
-#     name = Column(String(), nullable=False)
-
-#     __tablename__ = "scanners"
-#     __table_args__ = (
-#         UniqueConstraint("name", name="uc_scanner_name"),
-#         {"schema": "spdx"},
-#     )

From c39b9f2908319047b819bf331ea5604ccb945451 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Wed, 3 Dec 2025 14:03:01 -0500
Subject: [PATCH 047/104] specify `pr_review_body` as a User generated content
 string field for cleaning

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/pull_requests/tasks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py
index 812a4eef25..40f56e0ee5 100644
--- a/augur/tasks/github/pull_requests/tasks.py
+++ b/augur/tasks/github/pull_requests/tasks.py
@@ -403,7 +403,8 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
 
             logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}")
             pr_review_natural_keys = ["pr_review_src_id",]
-            augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys)
+            pr_review_string_fields = ["pr_review_body",]
+            augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys, string_fields=pr_review_string_fields)
 
 
 

From 1a52bf5054d56b570ce1e5bda077332640f9d527 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Wed, 3 Dec 2025 14:25:04 -0500
Subject: [PATCH 048/104] Update
 augur/tasks/git/util/facade_worker/facade_worker/config.py

Co-authored-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/git/util/facade_worker/facade_worker/config.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py
index 09f3c9d6ca..85097d6d54 100644
--- a/augur/tasks/git/util/facade_worker/facade_worker/config.py
+++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py
@@ -298,10 +298,7 @@ def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False,
             result = subprocess.run(cmd, **run_options)
 
             # Return appropriate output based on capture_output flag
-            if capture_output:
-                return result.returncode, result.stdout.strip()
-            else:
-                return result.returncode, ''
+            return result.returncode, (result.stdout.strip() if capture_output else '')
         except subprocess.TimeoutExpired:
             self.log_activity('Error', f'Git operation timed out: {operation_description}')
             return -1, ''

From 4d10fc13b18eee2c4a939741dc2b2920dc220672 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Wed, 3 Dec 2025 19:09:10 -0500
Subject: [PATCH 049/104] Implement batched processing for pull request reviews
 and contributors

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/pull_requests/tasks.py | 135 ++++++++++++++--------
 1 file changed, 90 insertions(+), 45 deletions(-)

diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py
index 812a4eef25..2468b663d4 100644
--- a/augur/tasks/github/pull_requests/tasks.py
+++ b/augur/tasks/github/pull_requests/tasks.py
@@ -327,9 +327,60 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) -
 
 
 
+def _flush_pr_review_batch(augur_db, contributors: list, pr_reviews: list, logger, owner: str, repo: str) -> None:
+    """
+    Insert accumulated PR review batch data into the database.
+
+    Handles contributor deduplication before insertion and bulk inserts both
+    contributors and PR reviews. Uses ON CONFLICT upsert logic via insert_data().
+
+    Args:
+        augur_db: DatabaseSession instance for database operations.
+        contributors: List of contributor dicts to insert. Will be deduplicated
+            using remove_duplicate_dicts() before insertion.
+        pr_reviews: List of PR review dicts to insert.
+        logger: Logger instance for status messages.
+        owner: Repository owner (for log messages).
+        repo: Repository name (for log messages).
+
+    Returns:
+        None. Lists are NOT cleared by this function - caller must clear them.
+    """
+    if contributors:
+        # Remove duplicates within the batch before inserting
+        unique_contributors = remove_duplicate_dicts(contributors)
+        logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(unique_contributors)} contributors")
+        augur_db.insert_data(unique_contributors, Contributor, ["cntrb_id"])
+
+    if pr_reviews:
+        logger.info(f"{owner}/{repo}: Inserting {len(pr_reviews)} pr reviews")
+        pr_review_natural_keys = ["pr_review_src_id"]
+        augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys)
+
+
 @celery.task(base=AugurSecondaryRepoCollectionTask)
 def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
+    """
+    Collect pull request reviews for a repository from the GitHub API.
 
+    Fetches reviews for each PR and inserts them into the database along with
+    their associated contributors. Uses batched processing to limit memory
+    usage - processes reviews in batches of ~1000 instead of accumulating all
+    reviews in memory before insertion.
+
+    Args:
+        repo_git: The repository's git URL (e.g., 'https://github.com/owner/repo').
+        full_collection: If True, collects reviews for all PRs. If False, only
+            collects reviews for PRs updated since the last secondary collection.
+
+    Returns:
+        None. Data is inserted directly into the database.
+
+    Note:
+        - Inherits error handling from AugurSecondaryRepoCollectionTask base class.
+        - Contributors are deduplicated within each batch before insertion.
+        - Uses ON CONFLICT upsert logic to handle duplicate reviews gracefully.
+    """
     logger = logging.getLogger(collect_pull_request_reviews.__name__)
 
     owner, repo = get_owner_repo(repo_git)
@@ -338,7 +389,6 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
     tool_source = "pull_request_reviews"
     data_source = "Github API"
 
-    repo_id = get_repo_by_repo_git(repo_git).repo_id
     with GithubTaskManifest(logger) as manifest:
 
         augur_db = manifest.augur_db
@@ -347,7 +397,6 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
         repo_id = execute_session_query(query, 'one').repo_id
 
         if full_collection:
-
             query = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).order_by(PullRequest.pr_src_number)
             prs = execute_session_query(query, 'all')
         else:
@@ -355,66 +404,62 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
             prs = get_updated_prs(repo_id, last_collected)
 
         pr_count = len(prs)
+        if pr_count == 0:
+            logger.debug(f"{owner}/{repo} No PRs to collect reviews for")
+            return
+
+        logger.info(f"{owner}/{repo}: Collecting reviews for {pr_count} PRs")
 
         github_data_access = GithubDataAccess(manifest.key_auth, logger)
 
-        all_pr_reviews = {}
-        for index, pr in enumerate(prs):
+        # Batch processing: accumulate reviews until batch size reached, then flush
+        REVIEW_BATCH_SIZE = 1000
+        contributors = []
+        pr_review_dicts = []
+        total_reviews_collected = 0
 
+        for index, pr in enumerate(prs):
             pr_number = pr.pr_src_number
             pull_request_id = pr.pull_request_id
 
-            logger.debug(f"{owner}/{repo} Collecting Pr Reviews for pr {index + 1} of {pr_count}")
+            # Log progress every 100 PRs
+            if index % 100 == 0:
+                logger.debug(f"{owner}/{repo} Processing PR {index + 1} of {pr_count}")
 
             pr_review_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}/reviews"
 
             try:
                 pr_reviews = list(github_data_access.paginate_resource(pr_review_url))
             except UrlNotFoundException as e:
-                logger.warning(e)
+                logger.warning(f"{owner}/{repo} PR #{pr_number}: {e}")
                 continue
 
-            if pr_reviews:
-                all_pr_reviews[pull_request_id] = pr_reviews
-
-        if not list(all_pr_reviews.keys()):
-            logger.debug(f"{owner}/{repo} No pr reviews for repo")
-            return
-
-        contributors = []
-        for pull_request_id, reviews in all_pr_reviews.items():
-
-            for review in reviews:
+            # Single-pass extraction: get both contributor and review data together
+            for review in pr_reviews:
+                # Extract contributor
                 contributor = process_pull_request_review_contributor(review, tool_source, tool_version, data_source)
                 if contributor:
                     contributors.append(contributor)
 
-            logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors")
-            augur_db.insert_data(contributors, Contributor, ["cntrb_id"])
-
-
-        pr_reviews = []
-        for pull_request_id, reviews in all_pr_reviews.items():
-
-            for review in reviews:
-                
+                # Extract review data (only if contributor was successfully linked)
                 if "cntrb_id" in review:
-                    pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version))
-
-            logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}")
-            pr_review_natural_keys = ["pr_review_src_id",]
-            augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+                    pr_review_dicts.append(
+                        extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_version, data_source)
+                    )
+
+            # Flush batch when threshold reached
+            if len(pr_review_dicts) >= REVIEW_BATCH_SIZE:
+                _flush_pr_review_batch(augur_db, contributors, pr_review_dicts, logger, owner, repo)
+                total_reviews_collected += len(pr_review_dicts)
+                contributors.clear()
+                pr_review_dicts.clear()
+
+        # Flush any remaining data
+        if pr_review_dicts:
+            _flush_pr_review_batch(augur_db, contributors, pr_review_dicts, logger, owner, repo)
+            total_reviews_collected += len(pr_review_dicts)
+
+        if total_reviews_collected == 0:
+            logger.debug(f"{owner}/{repo} No pr reviews found for repo")
+        else:
+            logger.info(f"{owner}/{repo}: Completed - collected {total_reviews_collected} reviews total")
\ No newline at end of file

From 413dc22b6c6d7ae10e254f011da5d15e8713c310 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Tue, 9 Dec 2025 14:18:42 -0500
Subject: [PATCH 050/104] Implement batched processing for collecting pull
 request review comments and contributors

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/pull_requests/tasks.py | 263 ++++++++++++++--------
 1 file changed, 167 insertions(+), 96 deletions(-)

diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py
index 2468b663d4..8fbe5f4951 100644
--- a/augur/tasks/github/pull_requests/tasks.py
+++ b/augur/tasks/github/pull_requests/tasks.py
@@ -11,12 +11,12 @@
 from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo
 from augur.tasks.github.util.github_task_session import GithubTaskManifest
 from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
-from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors
+from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors
 from augur.application.db.util import execute_session_query
 from ..messages import process_github_comment_contributors
 from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected
 
-from typing import Generator, List, Dict
+from typing import List
 
 
 platform_id = 1
@@ -182,30 +182,6 @@ def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db):
                         pr_metadata_natural_keys, string_fields=pr_metadata_string_fields)
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 def process_pull_request_review_contributor(pr_review: dict, tool_source: str, tool_version: str, data_source: str):
 
     # get contributor data and set pr cntrb_id
@@ -220,7 +196,27 @@ def process_pull_request_review_contributor(pr_review: dict, tool_source: str, t
 
 @celery.task(base=AugurSecondaryRepoCollectionTask)
 def collect_pull_request_review_comments(repo_git: str, full_collection: bool) -> None:
+    """
+    Collect pull request review comments for a repository from the GitHub API.
 
+    Fetches review comments and inserts them into the database along with
+    their associated contributors. Uses batched processing to limit memory
+    usage - processes comments in batches of ~1000 instead of accumulating all
+    comments in memory before insertion.
+
+    Args:
+        repo_git: The repository's git URL (e.g., 'https://github.com/owner/repo').
+        full_collection: If True, collects all review comments. If False, only
+            collects comments created since the last secondary collection.
+
+    Returns:
+        None. Data is inserted directly into the database.
+
+    Note:
+        - Inherits error handling from AugurSecondaryRepoCollectionTask base class.
+        - Contributors are deduplicated within each batch before insertion.
+        - Uses ON CONFLICT upsert logic to handle duplicate messages gracefully.
+    """
     owner, repo = get_owner_repo(repo_git)
 
     review_msg_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/comments"
@@ -232,9 +228,9 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) -
 
     if not full_collection:
         last_collected_date = get_secondary_data_last_collected(repo_id)
-        
+
         if last_collected_date:
-            # subtract 2 days to ensure all data is collected 
+            # Subtract 2 days to ensure all data is collected
             core_data_last_collected = (last_collected_date - timedelta(days=2)).replace(tzinfo=timezone.utc)
             review_msg_url += f"?since={core_data_last_collected.isoformat()}"
         else:
@@ -242,11 +238,8 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) -
 
     pr_reviews = get_pull_request_reviews_by_repo_id(repo_id)
 
-    # maps the github pr_review id to the auto incrementing pk that augur stores as pr_review id
-    pr_review_id_mapping = {}
-    for review in pr_reviews:
-        pr_review_id_mapping[review.pr_review_src_id] = review.pr_review_id
-
+    # Build mapping once: github pr_review_src_id -> augur pr_review_id
+    pr_review_id_mapping = {review.pr_review_src_id: review.pr_review_id for review in pr_reviews}
 
     tool_source = "Pr review comment task"
     tool_version = "2.0"
@@ -255,52 +248,156 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) -
     key_auth = GithubRandomKeyAuth(logger)
     github_data_access = GithubDataAccess(key_auth, logger)
 
-    all_raw_pr_review_messages = list(github_data_access.paginate_resource(review_msg_url))
-
+    # Batch processing: accumulate comments until batch size reached, then flush
+    COMMENT_BATCH_SIZE = 1000
     contributors = []
-    for comment in all_raw_pr_review_messages:
-        
+    pr_review_comment_dicts = []
+    pr_review_msg_mapping_data = {}
+    total_refs_inserted = 0
+
+    # Single-pass extraction: get both contributor and comment data together
+    for comment in github_data_access.paginate_resource(review_msg_url):
+        # Extract contributor
         _, contributor = process_github_comment_contributors(comment, tool_source, tool_version, data_source)
         if contributor is not None:
             contributors.append(contributor)
 
-    logger.info(f"{owner}/{repo} Pr review messages: Inserting {len(contributors)} contributors")
-    batch_insert_contributors(logger, contributors)
+        # Extract message data (only if it has a pr review id)
+        if comment.get("pull_request_review_id"):
+            pr_review_comment_dicts.append(
+                extract_needed_message_data(comment, platform_id, repo_id, tool_source, tool_version, data_source)
+            )
+            # Map github message id to raw comment data for later ref creation
+            pr_review_msg_mapping_data[comment["id"]] = comment
+
+        # Flush batch when threshold reached (check both to prevent unbounded growth)
+        if len(pr_review_comment_dicts) >= COMMENT_BATCH_SIZE or len(contributors) >= COMMENT_BATCH_SIZE:
+            refs_inserted = _flush_pr_review_comment_batch(
+                logger, contributors, pr_review_comment_dicts, pr_review_msg_mapping_data,
+                pr_review_id_mapping, repo_id, tool_version, data_source, owner, repo
+            )
+            total_refs_inserted += refs_inserted
+            contributors.clear()
+            pr_review_comment_dicts.clear()
+            pr_review_msg_mapping_data.clear()
+
+    # Flush any remaining data
+    if pr_review_comment_dicts:
+        refs_inserted = _flush_pr_review_comment_batch(
+            logger, contributors, pr_review_comment_dicts, pr_review_msg_mapping_data,
+            pr_review_id_mapping, repo_id, tool_version, data_source, owner, repo
+        )
+        total_refs_inserted += refs_inserted
 
+    if total_refs_inserted == 0:
+        logger.debug(f"{owner}/{repo} No pr review comments found for repo")
+    else:
+        logger.info(f"{owner}/{repo}: Completed - collected {total_refs_inserted} pr review comment refs total")
 
-    pr_review_comment_dicts = []
-    pr_review_msg_mapping_data = {}
 
-    pr_review_comments_len = len(all_raw_pr_review_messages)
-    for comment in all_raw_pr_review_messages:
+def _flush_contributors(logger, contributors: list, owner: str, repo: str, context: str) -> None:
+    """
+    Deduplicate and insert contributors for a batch.
 
-        # pull_request_review_id is required to map it to the correct pr review
-        if not comment["pull_request_review_id"]:
-            continue
+    Shared helper used by both PR review and PR review comment flush functions.
+    Handles deduplication via remove_duplicate_dicts() and bulk insert via
+    batch_insert_contributors().
 
-        pr_review_comment_dicts.append(
-                                extract_needed_message_data(comment, platform_id, repo_id, tool_source, tool_version, data_source)
-        )
+    Args:
+        logger: Logger instance for status messages.
+        contributors: List of contributor dicts to insert.
+        owner: Repository owner (for log messages).
+        repo: Repository name (for log messages).
+        context: Description of what's being processed (e.g., "PR reviews", "PR review comments").
+    """
+    if contributors:
+        unique_contributors = remove_duplicate_dicts(contributors)
+        logger.info(f"{owner}/{repo} {context}: Inserting {len(unique_contributors)} contributors")
+        batch_insert_contributors(logger, unique_contributors)
 
-        # map github message id to the data that maps it to the pr review
-        github_msg_id = comment["id"]
-        pr_review_msg_mapping_data[github_msg_id] = comment
 
+def _flush_pr_review_batch(augur_db, contributors: list, pr_reviews: list, logger, owner: str, repo: str) -> None:
+    """
+    Insert accumulated PR review batch data into the database.
 
+    Handles contributor deduplication before insertion and bulk inserts both
+    contributors and PR reviews. Uses ON CONFLICT upsert logic via insert_data().
 
-    logger.info(f"Inserting {len(pr_review_comment_dicts)} pr review comments")
+    Args:
+        augur_db: DatabaseSession instance for database operations.
+        contributors: List of contributor dicts to insert. Will be deduplicated
+            using remove_duplicate_dicts() before insertion.
+        pr_reviews: List of PR review dicts to insert.
+        logger: Logger instance for status messages.
+        owner: Repository owner (for log messages).
+        repo: Repository name (for log messages).
+
+    Returns:
+        None. Lists are NOT cleared by this function - caller must clear them.
+    """
+    _flush_contributors(logger, contributors, owner, repo, "PR reviews")
+
+    if pr_reviews:
+        logger.info(f"{owner}/{repo}: Inserting {len(pr_reviews)} pr reviews")
+        pr_review_natural_keys = ["pr_review_src_id"]
+        augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys)
+
+
+def _flush_pr_review_comment_batch(
+    logger,
+    contributors: list,
+    pr_review_comment_dicts: list,
+    pr_review_msg_mapping_data: dict,
+    pr_review_id_mapping: dict,
+    repo_id: int,
+    tool_version: str,
+    data_source: str,
+    owner: str,
+    repo: str
+) -> int:
+    """
+    Insert accumulated PR review comment batch data into the database.
+
+    Handles contributor deduplication before insertion, bulk inserts both
+    contributors and messages, then creates the message-to-review reference links.
+    Uses ON CONFLICT upsert logic via bulk_insert_dicts().
+
+    Args:
+        logger: Logger instance for status messages.
+        contributors: List of contributor dicts to insert. Will be deduplicated
+            using remove_duplicate_dicts() before insertion.
+        pr_review_comment_dicts: List of message dicts to insert into Message table.
+        pr_review_msg_mapping_data: Dict mapping github_msg_id to raw comment data
+            (needed for creating review refs after message insert).
+        pr_review_id_mapping: Dict mapping github pr_review_src_id to augur pr_review_id.
+        repo_id: The repository ID.
+        tool_version: Tool version string for metadata.
+        data_source: Data source string for metadata.
+        owner: Repository owner (for log messages).
+        repo: Repository name (for log messages).
+
+    Returns:
+        Number of PR review message refs successfully inserted.
+    """
+    _flush_contributors(logger, contributors, owner, repo, "PR review comments")
+
+    if not pr_review_comment_dicts:
+        return 0
+
+    logger.info(f"{owner}/{repo}: Inserting {len(pr_review_comment_dicts)} pr review comments")
     message_natural_keys = ["platform_msg_id", "pltfrm_id"]
     message_return_columns = ["msg_id", "platform_msg_id"]
     message_string_fields = ["msg_text"]
-    message_return_data = bulk_insert_dicts(logger, pr_review_comment_dicts, Message, message_natural_keys, 
-                                            return_columns=message_return_columns, string_fields=message_string_fields)
-    if message_return_data is None:
-        return
+    message_return_data = bulk_insert_dicts(
+        logger, pr_review_comment_dicts, Message, message_natural_keys,
+        return_columns=message_return_columns, string_fields=message_string_fields
+    )
 
+    if message_return_data is None:
+        return 0
 
     pr_review_message_ref_insert_data = []
     for data in message_return_data:
-
         augur_msg_id = data["msg_id"]
         github_msg_id = data["platform_msg_id"]
 
@@ -315,47 +412,21 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) -
             logger.warning(f"{owner}/{repo}: Could not find related pr review. We were searching for pr review with id: {github_pr_review_id}")
             continue
 
-        pr_review_message_ref = extract_pr_review_message_ref_data(comment, augur_pr_review_id, github_pr_review_id, repo_id, tool_version, data_source)
+        pr_review_message_ref = extract_pr_review_message_ref_data(
+            comment, augur_pr_review_id, github_pr_review_id, repo_id, tool_version, data_source
+        )
         pr_review_message_ref_insert_data.append(pr_review_message_ref)
 
+    if pr_review_message_ref_insert_data:
+        logger.info(f"{owner}/{repo}: Inserting {len(pr_review_message_ref_insert_data)} pr review refs")
+        pr_comment_ref_natural_keys = ["pr_review_msg_src_id"]
+        pr_review_msg_ref_string_columns = ["pr_review_msg_diff_hunk"]
+        bulk_insert_dicts(
+            logger, pr_review_message_ref_insert_data, PullRequestReviewMessageRef,
+            pr_comment_ref_natural_keys, string_fields=pr_review_msg_ref_string_columns
+        )
 
-    logger.info(f"Inserting {len(pr_review_message_ref_insert_data)} pr review refs")
-    pr_comment_ref_natural_keys = ["pr_review_msg_src_id"]
-    pr_review_msg_ref_string_columns = ["pr_review_msg_diff_hunk"]
-    bulk_insert_dicts(logger, pr_review_message_ref_insert_data, PullRequestReviewMessageRef, pr_comment_ref_natural_keys, string_fields=pr_review_msg_ref_string_columns)
-
-
-
-
-def _flush_pr_review_batch(augur_db, contributors: list, pr_reviews: list, logger, owner: str, repo: str) -> None:
-    """
-    Insert accumulated PR review batch data into the database.
-
-    Handles contributor deduplication before insertion and bulk inserts both
-    contributors and PR reviews. Uses ON CONFLICT upsert logic via insert_data().
-
-    Args:
-        augur_db: DatabaseSession instance for database operations.
-        contributors: List of contributor dicts to insert. Will be deduplicated
-            using remove_duplicate_dicts() before insertion.
-        pr_reviews: List of PR review dicts to insert.
-        logger: Logger instance for status messages.
-        owner: Repository owner (for log messages).
-        repo: Repository name (for log messages).
-
-    Returns:
-        None. Lists are NOT cleared by this function - caller must clear them.
-    """
-    if contributors:
-        # Remove duplicates within the batch before inserting
-        unique_contributors = remove_duplicate_dicts(contributors)
-        logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(unique_contributors)} contributors")
-        augur_db.insert_data(unique_contributors, Contributor, ["cntrb_id"])
-
-    if pr_reviews:
-        logger.info(f"{owner}/{repo}: Inserting {len(pr_reviews)} pr reviews")
-        pr_review_natural_keys = ["pr_review_src_id"]
-        augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys)
+    return len(pr_review_message_ref_insert_data)
 
 
 @celery.task(base=AugurSecondaryRepoCollectionTask)

From 1acb41bbd3a1979425485bab06b5b17f8fb688e8 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 13 Nov 2025 17:02:15 -0500
Subject: [PATCH 051/104] formatting: move table name and schema attributes up
 top for consistency

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 .../application/db/models/augur_operations.py | 45 ++++++++++---------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py
index 45ac1d8167..100cfd24dc 100644
--- a/augur/application/db/models/augur_operations.py
+++ b/augur/application/db/models/augur_operations.py
@@ -217,6 +217,10 @@ class WorkerSettingsFacade(Base):
 )
 
 class BadgingDEI(Base):
+    __tablename__ = 'dei_badging'
+    __table_args__ = (
+        {"schema": "augur_data"}
+    )
     id = Column(Integer, primary_key=True, nullable=False)
     badging_id = Column(Integer, nullable=False)
     level = Column(String, nullable=False)
@@ -227,27 +231,31 @@ class BadgingDEI(Base):
 
     repo = relationship("Repo")
 
-    __tablename__ = 'dei_badging'
+
+class Config(Base):
+    __tablename__ = 'config'
     __table_args__ = (
-        {"schema": "augur_data"}
+        UniqueConstraint('section_name', "setting_name", name='unique-config-setting'),
+        {"schema": "augur_operations"}
     )
 
-
-class Config(Base):
     id = Column(SmallInteger, primary_key=True, nullable=False)
     section_name = Column(String, nullable=False)
     setting_name = Column(String, nullable=False)
     value = Column(String)
     type = Column(String)
 
-    __tablename__ = 'config'
-    __table_args__ = (
-        UniqueConstraint('section_name', "setting_name", name='unique-config-setting'),
-        {"schema": "augur_operations"}
-    )
+    
 
 # add admit column to database
 class User(Base):
+    __tablename__ = 'users'
+    __table_args__ = (
+        UniqueConstraint('email', name='user-unique-email'),
+        UniqueConstraint('login_name', name='user-unique-name'),
+        UniqueConstraint('text_phone', name='user-unique-phone'),
+        {"schema": "augur_operations"}
+    )
 
     user_id = Column(Integer, primary_key=True)
     login_name = Column(String, nullable=False)
@@ -262,13 +270,6 @@ class User(Base):
     data_source = Column(String)
     data_collection_date = Column(TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP"))
 
-    __tablename__ = 'users'
-    __table_args__ = (
-        UniqueConstraint('email', name='user-unique-email'),
-        UniqueConstraint('login_name', name='user-unique-name'),
-        UniqueConstraint('text_phone', name='user-unique-phone'),
-        {"schema": "augur_operations"}
-    )
 
     groups = relationship("UserGroup", back_populates="user")
     tokens = relationship("UserSessionToken", back_populates="user")
@@ -629,17 +630,19 @@ def compute_hashsed_password(password):
 
 
 class UserGroup(Base):
+    __tablename__ = 'user_groups'
+    __table_args__ = (
+        UniqueConstraint('user_id', 'name', name='user_group_unique'),
+        {"schema": "augur_operations"}
+    )
+    
     group_id = Column(BigInteger, primary_key=True)
     user_id = Column(Integer,
                     ForeignKey("augur_operations.users.user_id", name="user_group_user_id_fkey")
     )
     name = Column(String, nullable=False)
     favorited = Column(Boolean, nullable=False, server_default=text("FALSE"))
-    __tablename__ = 'user_groups'
-    __table_args__ = (
-        UniqueConstraint('user_id', 'name', name='user_group_unique'),
-        {"schema": "augur_operations"}
-    )
+    
 
     user = relationship("User", back_populates="groups")
     repos = relationship("UserRepo", back_populates="group")

From f2929f764402c8176c5af06555d4b0bdaeae06cd Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Fri, 14 Nov 2025 16:23:21 -0500
Subject: [PATCH 052/104] table_args formatting

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/db/models/augur_data.py     | 18 ++++++++----
 .../application/db/models/augur_operations.py | 28 ++++---------------
 augur/application/db/models/spdx.py           | 25 +++++++++++++----
 3 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py
index 034a2bec01..9f7d8c7fb3 100644
--- a/augur/application/db/models/augur_data.py
+++ b/augur/application/db/models/augur_data.py
@@ -1359,7 +1359,8 @@ class Commit(Base):
 
 class CommitMessage(Base):
     __tablename__ = "commit_messages"
-    __table_args__ = ( UniqueConstraint("repo_id","cmt_hash", name="commit-message-insert-unique"),
+    __table_args__ = (
+        UniqueConstraint("repo_id","cmt_hash", name="commit-message-insert-unique"),
         { 
             "schema": "augur_data",
             "comment": "This table holds commit messages",
@@ -1930,9 +1931,12 @@ class RepoClusterMessage(Base):
 
 class RepoDependency(Base):
     __tablename__ = "repo_dependencies"
-    __table_args__ = ( UniqueConstraint("repo_id","dep_name","data_collection_date", name="deps-insert-unique"),
-        {"schema": "augur_data",
-        "comment": "Contains the dependencies for a repo.",},
+    __table_args__ = (
+        UniqueConstraint("repo_id","dep_name","data_collection_date", name="deps-insert-unique"),
+        {
+            "schema": "augur_data",
+            "comment": "Contains the dependencies for a repo."
+        },
     )
 
     repo_dependencies_id = Column(
@@ -1960,7 +1964,8 @@ class RepoDependency(Base):
 
 class RepoDepsLibyear(Base):
     __tablename__ = "repo_deps_libyear"
-    __table_args__ = ( UniqueConstraint("repo_id","name", "data_collection_date", name="deps-libyear-insert-unique"),
+    __table_args__ = (
+        UniqueConstraint("repo_id","name", "data_collection_date", name="deps-libyear-insert-unique"),
         {"schema": "augur_data"}
     )
 
@@ -1993,7 +1998,8 @@ class RepoDepsLibyear(Base):
 
 class RepoDepsScorecard(Base):
     __tablename__ = "repo_deps_scorecard"
-    __table_args__ = ( UniqueConstraint("repo_id","name", name="deps-scorecard-insert-unique"),
+    __table_args__ = (
+        UniqueConstraint("repo_id","name", name="deps-scorecard-insert-unique"),
         {"schema": "augur_data"}
     )
 
diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py
index 100cfd24dc..12bb94e6cf 100644
--- a/augur/application/db/models/augur_operations.py
+++ b/augur/application/db/models/augur_operations.py
@@ -218,9 +218,7 @@ class WorkerSettingsFacade(Base):
 
 class BadgingDEI(Base):
     __tablename__ = 'dei_badging'
-    __table_args__ = (
-        {"schema": "augur_data"}
-    )
+    __table_args__ = {"schema": "augur_data"}
     id = Column(Integer, primary_key=True, nullable=False)
     badging_id = Column(Integer, nullable=False)
     level = Column(String, nullable=False)
@@ -742,11 +740,7 @@ def convert_group_name_to_id(session, user_id: int, group_name: str) -> int:
 
 class UserRepo(Base):
     __tablename__ = "user_repos"
-    __table_args__ = (
-        {
-            "schema": "augur_operations"
-        }
-    )
+    __table_args__ = { "schema": "augur_operations" }
 
     group_id = Column(
         ForeignKey("augur_operations.user_groups.group_id", name="user_repo_group_id_fkey"), primary_key=True, nullable=False
@@ -1013,11 +1007,7 @@ def add_github_org_repos(session, url: List[str], user_id: int, group_name: int)
 
 class UserSessionToken(Base):
     __tablename__ = "user_session_tokens"
-    __table_args__ = (
-        {
-            "schema": "augur_operations"
-        }
-    )
+    __table_args__ = { "schema": "augur_operations" }
 
     token = Column(String, primary_key=True, nullable=False)
     user_id = Column(ForeignKey("augur_operations.users.user_id", name="user_session_token_user_id_fkey"))
@@ -1055,11 +1045,7 @@ def delete_refresh_tokens(self, session):
 
 class ClientApplication(Base):
     __tablename__ = "client_applications"
-    __table_args__ = (
-        {
-            "schema": "augur_operations"
-        }
-    )
+    __table_args__ = { "schema": "augur_operations" }
 
     id = Column(String, primary_key=True, nullable=False)
     user_id = Column(ForeignKey("augur_operations.users.user_id", name="client_application_user_id_fkey"), nullable=False)
@@ -1086,11 +1072,7 @@ def get_by_id(session, client_id):
 
 class Subscription(Base):
     __tablename__ = "subscriptions"
-    __table_args__ = (
-        {
-            "schema": "augur_operations"
-        }
-    )
+    __table_args__ = { "schema": "augur_operations" }
 
     application_id = Column(ForeignKey("augur_operations.client_applications.id", name="subscriptions_application_id_fkey"), primary_key=True)
     type_id = Column(ForeignKey("augur_operations.subscription_types.id", name="subscriptions_type_id_fkey"), primary_key=True)
diff --git a/augur/application/db/models/spdx.py b/augur/application/db/models/spdx.py
index 9e4949cccb..4e981dc54a 100644
--- a/augur/application/db/models/spdx.py
+++ b/augur/application/db/models/spdx.py
@@ -176,7 +176,10 @@ class SpdxPackage(Base):
 
 class SpdxPackagesFile(Base):
     __tablename__ = "packages_files"
-    __table_args__ = (UniqueConstraint("package_id", "file_name"), {"schema": "spdx"})
+    __table_args__ = (
+        UniqueConstraint("package_id", "file_name"),
+        {"schema": "spdx"}
+    )
 
     package_file_id = Column(
         Integer,
@@ -312,7 +315,10 @@ class SpdxFileContributor(Base):
 
 class SpdxFilesLicense(Base):
     __tablename__ = "files_licenses"
-    __table_args__ = (UniqueConstraint("file_id", "license_id"), {"schema": "spdx"})
+    __table_args__ = (
+        UniqueConstraint("file_id", "license_id"), 
+        {"schema": "spdx"}
+    )
 
     file_license_id = Column(
         Integer,
@@ -331,7 +337,10 @@ class SpdxFilesLicense(Base):
 
 class SpdxFilesScan(Base):
     __tablename__ = "files_scans"
-    __table_args__ = (UniqueConstraint("file_id", "scanner_id"), {"schema": "spdx"})
+    __table_args__ = (
+        UniqueConstraint("file_id", "scanner_id"), 
+        {"schema": "spdx"}
+    )
 
     file_scan_id = Column(
         Integer,
@@ -347,7 +356,10 @@ class SpdxFilesScan(Base):
 
 class SpdxPackagesScan(Base):
     __tablename__ = "packages_scans"
-    __table_args__ = (UniqueConstraint("package_id", "scanner_id"), {"schema": "spdx"})
+    __table_args__ = (
+        UniqueConstraint("package_id", "scanner_id"), 
+        {"schema": "spdx"}
+    )
 
     package_scan_id = Column(
         Integer,
@@ -383,7 +395,10 @@ class SpdxDocumentsCreator(Base):
 
 class SpdxExternalRef(Base):
     __tablename__ = "external_refs"
-    __table_args__ = (UniqueConstraint("document_id", "id_string"), {"schema": "spdx"})
+    __table_args__ = (
+        UniqueConstraint("document_id", "id_string"), 
+        {"schema": "spdx"}
+    )
 
     external_ref_id = Column(
         Integer,

From 79fa279816d0d58dfb0a506fecd7e98022d83eb6 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 11:05:25 -0500
Subject: [PATCH 053/104] fix: Use list.clear() in facade tasks to reduce
 memory overhead

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/git/facade_tasks.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py
index b0d638768f..08594a231a 100644
--- a/augur/tasks/git/facade_tasks.py
+++ b/augur/tasks/git/facade_tasks.py
@@ -212,7 +212,7 @@ def facade_fetch_missing_commit_messages(repo_git):
             
             if len(to_insert) >= 1000:
                 bulk_insert_dicts(logger,to_insert, CommitMessage, ["repo_id","cmt_hash"])
-                to_insert = []
+                to_insert.clear()
             
             to_insert.append(msg_record)
         except Exception as e: 
@@ -313,13 +313,14 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
                 )
                 if pendingCommitRecordsToInsert:
                     facade_bulk_insert_commits(logger, pendingCommitRecordsToInsert)
-                pendingCommitRecordsToInsert = []
+                pendingCommitRecordsToInsert.clear()
 
         if commit_msg:
             pendingCommitMessageRecordsToInsert.append(commit_msg)
 
         if len(pendingCommitMessageRecordsToInsert) >= 1000:
             bulk_insert_dicts(logger, pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id", "cmt_hash"])
+            pendingCommitMessageRecordsToInsert.clear()
 
     # FINAL MESSAGE INSERT
     bulk_insert_dicts(logger, pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id", "cmt_hash"])

From 836544d9dcdc844657a2608e27a58871e646fe87 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 11:05:46 -0500
Subject: [PATCH 054/104] fix: Process facade contributor results in batches

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/facade_github/tasks.py | 33 +++++++++++++++++++----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py
index eff64df6ee..3396de7b64 100644
--- a/augur/tasks/github/facade_github/tasks.py
+++ b/augur/tasks/github/facade_github/tasks.py
@@ -252,7 +252,6 @@ def insert_facade_contributors(self, repo_git):
 
     #Execute statement with session.
     result = execute_sql(new_contrib_sql)
-    new_contribs = [dict(row) for row in result.mappings()]
 
     #print(new_contribs)
 
@@ -262,7 +261,20 @@ def insert_facade_contributors(self, repo_git):
 
     key_auth = GithubRandomKeyAuth(logger)
 
-    process_commit_metadata(logger, key_auth, list(new_contribs), repo_id, platform_id)
+    # Process results in batches to reduce memory usage
+    batch = []
+    BATCH_SIZE = 1000
+
+    for row in result.mappings():
+        batch.append(dict(row))
+
+        if len(batch) >= BATCH_SIZE:
+            process_commit_metadata(logger, key_auth, batch, repo_id, platform_id)
+            batch.clear()
+
+    # Process remaining items in batch
+    if batch:
+        process_commit_metadata(logger, key_auth, batch, repo_id, platform_id)
 
     logger.debug("DEBUG: Got through the new_contribs")
     
@@ -300,10 +312,21 @@ def insert_facade_contributors(self, repo_git):
 
 
     result = execute_sql(resolve_email_to_cntrb_id_sql)
-    existing_cntrb_emails = [dict(row) for row in result.mappings()]
 
-    print(existing_cntrb_emails)
-    link_commits_to_contributor(logger, facade_helper,list(existing_cntrb_emails))
+    # Process results in batches to reduce memory usage
+    batch = []
+    BATCH_SIZE = 1000
+
+    for row in result.mappings():
+        batch.append(dict(row))
+
+        if len(batch) >= BATCH_SIZE:
+            link_commits_to_contributor(logger, facade_helper, batch)
+            batch.clear()
+
+    # Process remaining items in batch
+    if batch:
+        link_commits_to_contributor(logger, facade_helper, batch)
 
     return
 

From 7f502bd94e5acecc9aea315eafff3d764212dca6 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 11:06:26 -0500
Subject: [PATCH 055/104] fix: Convert issues collection to generator pattern
 with batching

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/issues.py | 68 ++++++++++++++++++++++++++++--------
 1 file changed, 54 insertions(+), 14 deletions(-)

diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py
index 37bee5c8dd..d100d511bc 100644
--- a/augur/tasks/github/issues.py
+++ b/augur/tasks/github/issues.py
@@ -1,6 +1,6 @@
 import logging
 import traceback
-from datetime import timedelta, timezone
+from datetime import timedelta, timezone, datetime
 
 from sqlalchemy.exc import IntegrityError
 
@@ -20,9 +20,21 @@
 development = get_development_flag()
 
 @celery.task(base=AugurCoreRepoCollectionTask)
-def collect_issues(repo_git : str, full_collection: bool) -> int:
+def collect_issues(repo_git: str, full_collection: bool) -> int:
+    """
+    Collect all issues (excluding pull requests) for a repository.
 
-    logger = logging.getLogger(collect_issues.__name__) 
+    Retrieves issues from GitHub API in batches of 1000 and inserts them along with
+    related labels, assignees, and contributors.
+
+    Args:
+        repo_git: Full git URL (e.g., 'https://github.com/chaoss/augur')
+        full_collection: True for all historical data, False for incremental (last collection - 2 days)
+
+    Returns:
+        Number of issues collected, or -1 on error
+    """
+    logger = logging.getLogger(collect_issues.__name__)
 
     repo_id = get_repo_by_repo_git(repo_git).repo_id
 
@@ -31,33 +43,60 @@ def collect_issues(repo_git : str, full_collection: bool) -> int:
     if full_collection:
         core_data_last_collected = None
     else:
-        # subtract 2 days to ensure all data is collected 
+        # Subtract 2 days to ensure all data is collected
         core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc)
 
     key_auth = GithubRandomKeyAuth(logger)
 
     logger.info(f'this is the manifest.key_auth value: {str(key_auth)}')
 
-    try:    
-        issue_data = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected)
+    try:
+        issue_data_generator = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected)
 
-        if not issue_data:
-            logger.info(f"{owner}/{repo} has no issues")
-            return 0
+        # Process issues in batches to avoid memory spikes
+        batch = []
+        total_issues = 0
+        batch_size = 1000
+
+        for issue in issue_data_generator:
+            batch.append(issue)
 
-        total_issues = len(issue_data)
-        process_issues(issue_data, f"{owner}/{repo}: Issue task", repo_id, logger)
+            if len(batch) >= batch_size:
+                logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues})")
+                process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger)
+                total_issues += len(batch)
+                batch.clear()
+
+        # Process remaining issues in the last batch
+        if len(batch) > 0:
+            logger.info(f"{owner}/{repo}: Processing final batch of {len(batch)} issues")
+            process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger)
+            total_issues += len(batch)
+
+        if total_issues == 0:
+            logger.info(f"{owner}/{repo} has no issues")
 
         return total_issues
-            
+
     except Exception as e:
         logger.error(f"Could not collect issues for repo {repo_git}\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}")
         return -1
 
 
 
-def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None:
+def retrieve_all_issue_data(repo_git: str, logger:logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None):
+    """
+    Retrieve all issue data for a repository as a generator.
+
+    Returns a generator to avoid materializing all issues in memory at once.
+    This is critical for repos with 10,000+ issues to prevent memory spikes.
 
+    Args:
+        repo_git (str): The GitHub repository in "owner/repo" format.
+        logger (logging.Logger): Logger for logging messages.
+        key_auth (GithubRandomKeyAuth): Auth handler for GitHub API.
+        since (datetime, optional): Only issues updated since this datetime will be retrieved.
+    """
     owner, repo = get_owner_repo(repo_git)
 
     logger.info(f"Collecting issues for {owner}/{repo}")
@@ -74,7 +113,8 @@ def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None:
 
     issues_paginator = github_data_access.paginate_resource(url)
 
-    return list(issues_paginator)
+    # Return the generator directly instead of materializing it
+    return issues_paginator
     
 def process_issues(issues, task_name, repo_id, logger) -> None:
     

From 44e6967d70491a6fdf9148bf93628d63e3793184 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 11:06:50 -0500
Subject: [PATCH 056/104] fix: Add batch processing to PR commits and files
 collection

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 .../pull_requests/commits_model/core.py       | 14 ++++++---
 .../github/pull_requests/files_model/core.py  | 14 ++++++---
 augur/tasks/github/pull_requests/tasks.py     | 29 ++++++-------------
 3 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py
index 2df6d66f5d..83b283bb6d 100644
--- a/augur/tasks/github/pull_requests/commits_model/core.py
+++ b/augur/tasks/github/pull_requests/commits_model/core.py
@@ -43,13 +43,15 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti
     logger.info(f"Getting pull request commits for repo: {repo.repo_git}")
 
     github_data_access = GithubDataAccess(key_auth, logger)
-        
+
+    BATCH_SIZE = 1000
+    pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"]
     all_data = []
     for index,pr_info in enumerate(pr_urls):
         logger.info(f'{task_name}: Querying commits for pull request #{index + 1} of {len(pr_urls)}')
 
         commits_url = pr_info['pr_url'] + '/commits?state=all'
-        
+
         if not pr_info.get('pr_url'):
             logger.warning(f"{task_name}: No pr_url found for pull request info: {pr_info}. Skipping.")
             continue
@@ -70,13 +72,17 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti
                     'repo_id': repo.repo_id,
                 }
                 all_data.append(pr_commit_row)
+
+                if len(all_data) >= BATCH_SIZE:
+                    logger.info(f"{task_name}: Inserting {len(all_data)} rows")
+                    augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys)
+                    all_data.clear()
         except UrlNotFoundException:
             logger.info(f"{task_name}: PR with url of {pr_info['pr_url']} returned 404 on commit data. Skipping.")
             continue
-            
+
     if len(all_data) > 0:
         logger.info(f"{task_name}: Inserting {len(all_data)} rows")
-        pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"]
         augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys)
             
 
diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py
index cbecb44d6d..60222a3bc1 100644
--- a/augur/tasks/github/pull_requests/files_model/core.py
+++ b/augur/tasks/github/pull_requests/files_model/core.py
@@ -40,12 +40,14 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection
 
     github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger)
 
+    BATCH_SIZE = 1000
+    pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"]
     pr_file_rows = []
     logger.info(f"Getting pull request files for repo: {repo.repo_git}")
     for index, pr_info in enumerate(pr_numbers):
 
         logger.info(f'Querying files for pull request #{index + 1} of {len(pr_numbers)}')
-        
+
         query = """
             query($repo: String!, $owner: String!,$pr_number: Int!, $numRecords: Int!, $cursor: String) {
                 repository(name: $repo, owner: $owner) {
@@ -68,7 +70,7 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection
                 }
             }
         """
-        
+
         values = ["repository", "pullRequest", "files"]
         params = {
             'owner': owner,
@@ -92,6 +94,11 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection
                 }
 
                 pr_file_rows.append(data)
+
+                if len(pr_file_rows) >= BATCH_SIZE:
+                    logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows")
+                    augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys)
+                    pr_file_rows.clear()
         except NotFoundException as e:
             logger.info(f"{task_name}: PR with number of {pr_info['pr_src_number']} returned 404 on file data. Skipping.")
             continue
@@ -101,6 +108,5 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection
 
 
     if len(pr_file_rows) > 0:
-        # Execute a bulk upsert with sqlalchemy 
-        pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"]
+        logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows")
         augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys)
diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py
index 40f56e0ee5..1fbfec060a 100644
--- a/augur/tasks/github/pull_requests/tasks.py
+++ b/augur/tasks/github/pull_requests/tasks.py
@@ -381,6 +381,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
             logger.debug(f"{owner}/{repo} No pr reviews for repo")
             return
 
+        # Process contributors (all_pr_reviews already in memory, so no OOM risk)
         contributors = []
         for pull_request_id, reviews in all_pr_reviews.items():
 
@@ -389,33 +390,21 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None:
                 if contributor:
                     contributors.append(contributor)
 
-            logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors")
-            augur_db.insert_data(contributors, Contributor, ["cntrb_id"])
+        logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors")
+        augur_db.insert_data(contributors, Contributor, ["cntrb_id"])
 
 
+        # Process pr reviews (all_pr_reviews already in memory, so no OOM risk)
         pr_reviews = []
         for pull_request_id, reviews in all_pr_reviews.items():
 
             for review in reviews:
-                
+
                 if "cntrb_id" in review:
                     pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version))
 
-            logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}")
-            pr_review_natural_keys = ["pr_review_src_id",]
-            pr_review_string_fields = ["pr_review_body",]
-            augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys, string_fields=pr_review_string_fields)
-
-
-
-
-
-
-
-
-
-
-
-
-
+        logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}")
+        pr_review_natural_keys = ["pr_review_src_id",]
+        pr_review_string_fields = ["pr_review_body",]
+        augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys, string_fields=pr_review_string_fields)
 

From ab2fd7b72bde2e7ccd3ec65ac71c65b2e7c30e46 Mon Sep 17 00:00:00 2001
From: "Sean P. Goggins" <s@goggins.com>
Date: Thu, 20 Nov 2025 11:42:45 -0600
Subject: [PATCH 057/104] Update augur/tasks/github/issues.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Sean P. Goggins <s@goggins.com>
---
 augur/tasks/github/issues.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py
index d100d511bc..68cae4d30c 100644
--- a/augur/tasks/github/issues.py
+++ b/augur/tasks/github/issues.py
@@ -84,7 +84,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int:
 
 
 
-def retrieve_all_issue_data(repo_git: str, logger:logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None):
+def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None):
     """
     Retrieve all issue data for a repository as a generator.
 

From 0d068dc8c68016f12f75cfdfbf25409162cf4bbf Mon Sep 17 00:00:00 2001
From: "Sean P. Goggins" <s@goggins.com>
Date: Thu, 20 Nov 2025 11:43:41 -0600
Subject: [PATCH 058/104] Update augur/tasks/github/issues.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Signed-off-by: Sean P. Goggins <s@goggins.com>
---
 augur/tasks/github/issues.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py
index 68cae4d30c..aaca35ed5f 100644
--- a/augur/tasks/github/issues.py
+++ b/augur/tasks/github/issues.py
@@ -62,7 +62,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int:
             batch.append(issue)
 
             if len(batch) >= batch_size:
-                logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues})")
+                logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues + len(batch)})")
                 process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger)
                 total_issues += len(batch)
                 batch.clear()

From b5eac7a48d9de6f89049ceab7c8474629f4ed3d4 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Thu, 20 Nov 2025 17:05:21 -0500
Subject: [PATCH 059/104] fix: Optimize database cursor usage by fetching
 results immediately in insert_facade_contributors

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/facade_github/tasks.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py
index 3396de7b64..73fd9a51b5 100644
--- a/augur/tasks/github/facade_github/tasks.py
+++ b/augur/tasks/github/facade_github/tasks.py
@@ -253,6 +253,10 @@ def insert_facade_contributors(self, repo_git):
     #Execute statement with session.
     result = execute_sql(new_contrib_sql)
 
+    # Fetch all results immediately to close the database cursor/connection
+    # This prevents holding the connection open during GitHub API calls
+    rows = result.mappings().fetchall()
+
     #print(new_contribs)
 
     #json.loads(pd.read_sql(new_contrib_sql, self.db, params={
@@ -265,7 +269,7 @@ def insert_facade_contributors(self, repo_git):
     batch = []
     BATCH_SIZE = 1000
 
-    for row in result.mappings():
+    for row in rows:
         batch.append(dict(row))
 
         if len(batch) >= BATCH_SIZE:
@@ -313,11 +317,15 @@ def insert_facade_contributors(self, repo_git):
 
     result = execute_sql(resolve_email_to_cntrb_id_sql)
 
+    # Fetch all results immediately to close the database cursor/connection
+    # This prevents holding the connection open during database UPDATE operations
+    rows = result.mappings().fetchall()
+
     # Process results in batches to reduce memory usage
     batch = []
     BATCH_SIZE = 1000
 
-    for row in result.mappings():
+    for row in rows:
         batch.append(dict(row))
 
         if len(batch) >= BATCH_SIZE:

From f9052cbfe0b9d711002cbf02a6d12327cf1d6eb6 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Mon, 1 Dec 2025 15:29:49 -0500
Subject: [PATCH 060/104] Pylint and other style fixes

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/git/facade_tasks.py           |  2 +-
 augur/tasks/github/facade_github/tasks.py |  1 -
 augur/tasks/github/issues.py              |  2 +-
 augur/tasks/github/pull_requests/tasks.py | 34 ++++-------------------
 4 files changed, 7 insertions(+), 32 deletions(-)

diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py
index 08594a231a..0726b6df11 100644
--- a/augur/tasks/git/facade_tasks.py
+++ b/augur/tasks/git/facade_tasks.py
@@ -255,7 +255,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
     facade_helper.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}")
 
     
-    if not len(missing_commits) or repo_id is None:
+    if not missing_commits or repo_id is None:
         #session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits))
         return
     
diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py
index 73fd9a51b5..53a3d6648a 100644
--- a/augur/tasks/github/facade_github/tasks.py
+++ b/augur/tasks/github/facade_github/tasks.py
@@ -5,7 +5,6 @@
 from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask
 from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException
 from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
-from augur.application.db.models import Contributor
 from augur.tasks.github.facade_github.core import *
 from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors
 from augur.application.db.lib import get_session, execute_session_query
diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py
index aaca35ed5f..91e56deaf7 100644
--- a/augur/tasks/github/issues.py
+++ b/augur/tasks/github/issues.py
@@ -12,7 +12,7 @@
 from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
 from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo
 from augur.tasks.util.worker_util import remove_duplicate_dicts
-from augur.application.db.models import Issue, IssueLabel, IssueAssignee, Contributor
+from augur.application.db.models import Issue, IssueLabel, IssueAssignee
 from augur.application.config import get_development_flag
 from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors
 
diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py
index 1fbfec060a..d4d0b3114d 100644
--- a/augur/tasks/github/pull_requests/tasks.py
+++ b/augur/tasks/github/pull_requests/tasks.py
@@ -11,12 +11,12 @@
 from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo
 from augur.tasks.github.util.github_task_session import GithubTaskManifest
 from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
-from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors
+from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors
 from augur.application.db.util import execute_session_query
 from ..messages import process_github_comment_contributors
 from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected
 
-from typing import Generator, List, Dict
+from typing import List
 
 
 platform_id = 1
@@ -52,15 +52,15 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int:
                 total_count += len(all_data)
                 all_data.clear()
 
-        if len(all_data):
+        if all_data:
             process_pull_requests(all_data, f"{owner}/{repo}: Github Pr task", repo_id, logger, augur_db)
             total_count += len(all_data)
 
         if total_count > 0:
-            return total_count
-        else:
             logger.debug(f"{owner}/{repo} has no pull requests")
             return 0
+
+        return total_count
         
         
     
@@ -182,30 +182,6 @@ def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db):
                         pr_metadata_natural_keys, string_fields=pr_metadata_string_fields)
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 def process_pull_request_review_contributor(pr_review: dict, tool_source: str, tool_version: str, data_source: str):
 
     # get contributor data and set pr cntrb_id

From 29edd21a196a91231f0f59c5d79cbd6eb65e13b4 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 23 Oct 2025 16:31:14 -0400
Subject: [PATCH 061/104] swap code to using tomli

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 .../libyear_util/pypi_parser.py                    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
index fa4a1d7b63..1bf56f202a 100644
--- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
+++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
@@ -1,6 +1,6 @@
 import re, os
 import json
-import toml     
+import tomli     
 import logging
 import yaml
 
@@ -93,15 +93,15 @@ def map_dependencies_pipfile(packages, type):
 
 
 #def parse_pipfile(file_handle):
-#    manifest = toml.load(file_handle)
+#    manifest = tomli.load(file_handle)
 #    return map_dependencies_pipfile(manifest['packages'],'runtime') + #map_dependencies_pipfile(manifest['dev-packages'], 'develop')
 ## Erro handling Means that the parse_pipfile(...) old function is assuming the presence of a dev-packages key in the parsed Pipfile, but that key does not exist in some cases.
 
 def parse_pipfile(file_handle):
-    import toml
+    import tomli
 
     try:
-        manifest = toml.load(file_handle)
+        manifest = tomli.load(file_handle)
     except Exception as e:
         logging.warning(f"Failed to parse Pipfile: {getattr(file_handle, 'name', 'unknown')}, error: {e}")
         return []
@@ -154,8 +154,8 @@ def parse_setup_py(file_handle):
 def parse_poetry(file_handle, repo_id=None, path=None):
     file_name = getattr(file_handle, 'name', 'unknown')
     try:
-        manifest = toml.load(file_handle)
-    except toml.TomlDecodeError as e:
+        manifest = tomli.load(file_handle)
+    except tomli.TomlDecodeError as e:
         logging.warning(f"[Repo ID: {repo_id}] Skipping malformed TOML file: {file_name} at {path}, error: {e}")
         return []
     except Exception as e:
@@ -172,7 +172,7 @@ def parse_poetry(file_handle, repo_id=None, path=None):
 
 
 def parse_poetry_lock(file_handle):
-    manifest = toml.load(file_handle)
+    manifest = tomli.load(file_handle)
     deps = list()
     group = 'runtime'
     for package in manifest['package']:

From 53953694060e81c35d1eeb01fd17103e04a51072 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 23 Oct 2025 16:31:59 -0400
Subject: [PATCH 062/104] swap toml package in dependencies

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 pyproject.toml | 2 +-
 uv.lock        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 908558f239..8f99591e75 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,7 +84,7 @@ dependencies = [
     "tensorflow==2.15.0",
     "textblob==0.15.3",
     "textual>=0.73.0",
-    "toml",
+    "tomli>=2.2.1",
     "toolz>=0.8.2",
     "tornado==6.4.1",
     "typing-extensions>=4.7",
diff --git a/uv.lock b/uv.lock
index 819bc2be00..1631362afb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -207,7 +207,7 @@ dependencies = [
     { name = "tensorflow" },
     { name = "textblob" },
     { name = "textual" },
-    { name = "toml" },
+    { name = "tomli" },
     { name = "toolz" },
     { name = "tornado" },
     { name = "typing-extensions" },
@@ -332,7 +332,7 @@ requires-dist = [
     { name = "tensorflow", specifier = "==2.15.0" },
     { name = "textblob", specifier = "==0.15.3" },
     { name = "textual", specifier = ">=0.73.0" },
-    { name = "toml" },
+    { name = "tomli", specifier = ">=2.2.1" },
     { name = "toolz", specifier = ">=0.8.2" },
     { name = "tornado", specifier = "==6.4.1" },
     { name = "typing-extensions", specifier = ">=4.7" },

From 9553151e18d547a51a3bfeffa3424dd62b9aee61 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 23 Oct 2025 16:33:45 -0400
Subject: [PATCH 063/104] use built in tomllib instead

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 .../libyear_util/pypi_parser.py                    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
index 1bf56f202a..7b1d2b1f4a 100644
--- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
+++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
@@ -1,6 +1,6 @@
 import re, os
 import json
-import tomli     
+import tomllib     
 import logging
 import yaml
 
@@ -93,15 +93,15 @@ def map_dependencies_pipfile(packages, type):
 
 
 #def parse_pipfile(file_handle):
-#    manifest = tomli.load(file_handle)
+#    manifest = tomllib.load(file_handle)
 #    return map_dependencies_pipfile(manifest['packages'],'runtime') + #map_dependencies_pipfile(manifest['dev-packages'], 'develop')
 ## Erro handling Means that the parse_pipfile(...) old function is assuming the presence of a dev-packages key in the parsed Pipfile, but that key does not exist in some cases.
 
 def parse_pipfile(file_handle):
-    import tomli
+    import tomllib
 
     try:
-        manifest = tomli.load(file_handle)
+        manifest = tomllib.load(file_handle)
     except Exception as e:
         logging.warning(f"Failed to parse Pipfile: {getattr(file_handle, 'name', 'unknown')}, error: {e}")
         return []
@@ -154,8 +154,8 @@ def parse_setup_py(file_handle):
 def parse_poetry(file_handle, repo_id=None, path=None):
     file_name = getattr(file_handle, 'name', 'unknown')
     try:
-        manifest = tomli.load(file_handle)
-    except tomli.TomlDecodeError as e:
+        manifest = tomllib.load(file_handle)
+    except tomllib.TomlDecodeError as e:
         logging.warning(f"[Repo ID: {repo_id}] Skipping malformed TOML file: {file_name} at {path}, error: {e}")
         return []
     except Exception as e:
@@ -172,7 +172,7 @@ def parse_poetry(file_handle, repo_id=None, path=None):
 
 
 def parse_poetry_lock(file_handle):
-    manifest = tomli.load(file_handle)
+    manifest = tomllib.load(file_handle)
     deps = list()
     group = 'runtime'
     for package in manifest['package']:

From 0f27edea91ce72fb1626abe8ccf4c983148b1ef5 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Thu, 23 Oct 2025 16:36:32 -0400
Subject: [PATCH 064/104] tomli only needed on older python versions since its
 part of the standard lib since 3.11

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 .../dependency_libyear_tasks/libyear_util/pypi_parser.py   | 7 ++++++-
 pyproject.toml                                             | 2 +-
 uv.lock                                                    | 4 ++--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
index 7b1d2b1f4a..5a99194ccf 100644
--- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
+++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
@@ -1,6 +1,11 @@
 import re, os
 import json
-import tomllib     
+import sys
+if sys.version_info >= (3, 11):
+    import tomllib
+else:
+    import tomli as tomllib
+   
 import logging
 import yaml
 
diff --git a/pyproject.toml b/pyproject.toml
index 8f99591e75..8193867b81 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,7 +84,7 @@ dependencies = [
     "tensorflow==2.15.0",
     "textblob==0.15.3",
     "textual>=0.73.0",
-    "tomli>=2.2.1",
+    "tomli>=2.2.1 ; python_full_version < '3.11'",
     "toolz>=0.8.2",
     "tornado==6.4.1",
     "typing-extensions>=4.7",
diff --git a/uv.lock b/uv.lock
index 1631362afb..daa844fd49 100644
--- a/uv.lock
+++ b/uv.lock
@@ -207,7 +207,7 @@ dependencies = [
     { name = "tensorflow" },
     { name = "textblob" },
     { name = "textual" },
-    { name = "tomli" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
     { name = "toolz" },
     { name = "tornado" },
     { name = "typing-extensions" },
@@ -332,7 +332,7 @@ requires-dist = [
     { name = "tensorflow", specifier = "==2.15.0" },
     { name = "textblob", specifier = "==0.15.3" },
     { name = "textual", specifier = ">=0.73.0" },
-    { name = "tomli", specifier = ">=2.2.1" },
+    { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.2.1" },
     { name = "toolz", specifier = ">=0.8.2" },
     { name = "tornado", specifier = "==6.4.1" },
     { name = "typing-extensions", specifier = ">=4.7" },

From 1cab8cea7ddd07e311f3626ee6c06ee1fc489fbe Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Wed, 19 Nov 2025 16:14:53 -0500
Subject: [PATCH 065/104] remove import from within function

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 .../git/dependency_libyear_tasks/libyear_util/pypi_parser.py    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
index 5a99194ccf..11b880e04c 100644
--- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
+++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py
@@ -103,8 +103,6 @@ def map_dependencies_pipfile(packages, type):
 ## Erro handling Means that the parse_pipfile(...) old function is assuming the presence of a dev-packages key in the parsed Pipfile, but that key does not exist in some cases.
 
 def parse_pipfile(file_handle):
-    import tomllib
-
     try:
         manifest = tomllib.load(file_handle)
     except Exception as e:

From 59bd91826164333d38a419a95547d208669f2066 Mon Sep 17 00:00:00 2001
From: Shlok Gilda <gildashlok@hotmail.com>
Date: Mon, 15 Dec 2025 01:09:50 -0500
Subject: [PATCH 066/104] Fix null target check in get_release_inf function

Signed-off-by: Shlok Gilda <gildashlok@hotmail.com>
---
 augur/tasks/github/releases/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py
index 255b34cf89..f6b2f5e56b 100644
--- a/augur/tasks/github/releases/core.py
+++ b/augur/tasks/github/releases/core.py
@@ -38,7 +38,7 @@ def get_release_inf(repo_id, release, tag_only):
             'tag_only': tag_only
         }
     else:
-        if 'tagger' in release['target']:
+        if release['target'] and 'tagger' in release['target']:
 
             tagger = release["target"]["tagger"]
 

From 23df46e1deff9085da6ec051595464406e74e750 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 11 Nov 2025 12:46:09 -0500
Subject: [PATCH 067/104] pass through follow_redirects parameter in hit_api so
 clients can change it

Discovered by gpt5 via claude

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/util/github_paginator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/augur/tasks/github/util/github_paginator.py b/augur/tasks/github/util/github_paginator.py
index bd141d0c32..990bc4f738 100644
--- a/augur/tasks/github/util/github_paginator.py
+++ b/augur/tasks/github/util/github_paginator.py
@@ -9,7 +9,7 @@
 from enum import Enum
 
  
-def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', ) -> Optional[httpx.Response]:
+def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', follow_redirects=True) -> Optional[httpx.Response]:
     """Ping the api and get the data back for the page.
 
     Returns:
@@ -21,7 +21,7 @@ def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10,
 
         try:
             response = client.request(
-                method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=True)
+                method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=follow_redirects)
 
         except TimeoutError:
             logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n")

From a393e5b8782300f7ab5a90605e5e0e8cc84f6f4b Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 11 Nov 2025 12:46:43 -0500
Subject: [PATCH 068/104] dont follow redirects when checking github move
 Discovered by gpt5 via claude

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index b302a70a06..251abd3362 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -51,7 +51,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
 
     attempts = 0
     while attempts < 10:
-        response_from_gh = hit_api(key_auth, url, logger)
+        response_from_gh = hit_api(key_auth, url, logger, follow_redirects=False)
 
         if response_from_gh and response_from_gh.status_code != 404:
             break

From 7d1f6f0c56724142ab667cedf144084bf2643318 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 11 Nov 2025 13:18:35 -0500
Subject: [PATCH 069/104] avoid dangerous modification of sqalchemy internal
 representations when updating the DB

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 251abd3362..0542cb107e 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -21,9 +21,11 @@ def update_repo_with_dict(repo,new_dict,logger):
             logger: logging object
             db: db object
     """
-    
-    to_insert = repo.__dict__
-    del to_insert['_sa_instance_state']
+    to_insert = {}
+    to_insert['repo_git'] = repo.repo_git
+    to_insert['repo_path'] = repo.repo_path
+    to_insert['repo_name'] = repo.repo_name
+    to_insert['description'] = repo.description
     to_insert.update(new_dict)
 
     result = bulk_insert_dicts(logger, to_insert, Repo, ['repo_id'])

From 30a3b69e38c304010f007254c76dd07afe1d63da Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 11 Nov 2025 13:19:38 -0500
Subject: [PATCH 070/104] perform timeout check before trying to access the
 response object

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 0542cb107e..69ff84d64b 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -60,6 +60,10 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
 
         attempts += 1
 
+    if attempts >= 10:
+        logger.error(f"Could not check if repo moved because the api timed out 10 times. Url: {url}")
+        raise Exception(f"ERROR: Could not get api response for repo: {url}")
+
     #Update Url and retry if 301
     #301 moved permanently 
     if response_from_gh.status_code == 301:
@@ -119,10 +123,6 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
         session.commit()
         raise Exception("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!")
 
-
-    if attempts >= 10:
-        logger.error(f"Could not check if repo moved because the api timed out 10 times. Url: {url}")
-        raise Exception(f"ERROR: Could not get api response for repo: {url}")
     
     #skip if not 404
     logger.info(f"Repo found at url: {url}")

From c2ef9651edb27141c46c7deef7cefdb914b89d6b Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 11 Nov 2025 13:19:57 -0500
Subject: [PATCH 071/104] replace wildcard import with importing the relevant
 objects

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 69ff84d64b..0abf14dc0a 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -1,5 +1,5 @@
 from augur.tasks.github.util.github_task_session import *
-from augur.application.db.models import *
+from augur.application.db.models import Repo, CollectionStatus
 from augur.tasks.github.util.github_paginator import hit_api
 from augur.tasks.github.util.util import get_owner_repo
 from augur.tasks.github.util.util import parse_json_response

From f13cddfe4e7b1a203c835154acef61945531d0fe Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 11 Nov 2025 13:21:43 -0500
Subject: [PATCH 072/104] handle extreme edge case of a 301 redirect with no
 location field by throwing an exception Assisted-by: GPT5 via cursor

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 0abf14dc0a..8564459ee3 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -67,8 +67,12 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
     #Update Url and retry if 301
     #301 moved permanently 
     if response_from_gh.status_code == 301:
+        redirect_location = response_from_gh.headers.get('location') or response_from_gh.headers.get('Location')
+        if not redirect_location:
+            logger.error(f"Could not check if repo moved because the redirect location is not present. Url: {url}")
+            raise Exception(f"ERROR: Could not get redirect location for repo: {url}")
 
-        owner, name = extract_owner_and_repo_from_endpoint(key_auth, response_from_gh.headers['location'], logger)
+        owner, name = extract_owner_and_repo_from_endpoint(key_auth, redirect_location, logger)
 
         try:
             old_description = str(repo.description)

From cd9b090109b886ece4d503195b771ac33cca8eca Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 11 Nov 2025 13:22:16 -0500
Subject: [PATCH 073/104] stop retrying the request if any response codes from
 github are received.

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 8564459ee3..3fb4ad3591 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -55,7 +55,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
     while attempts < 10:
         response_from_gh = hit_api(key_auth, url, logger, follow_redirects=False)
 
-        if response_from_gh and response_from_gh.status_code != 404:
+        if response_from_gh:
             break
 
         attempts += 1

From 4d909d339575356098b7cf97329258b31f2d5adf Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 11 Nov 2025 14:49:43 -0500
Subject: [PATCH 074/104] add missing repo_id value

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 3fb4ad3591..4938c9fa75 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -22,6 +22,7 @@ def update_repo_with_dict(repo,new_dict,logger):
             db: db object
     """
     to_insert = {}
+    to_insert['repo_id'] = repo.repo_id # this is here because its needed as a unique key for bulk_insert_dicts
     to_insert['repo_git'] = repo.repo_git
     to_insert['repo_path'] = repo.repo_path
     to_insert['repo_name'] = repo.repo_name

From 62926f01aee8acf8c08f3db611aa25e76d8ac03e Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Mon, 17 Nov 2025 11:59:34 -0500
Subject: [PATCH 075/104] ok turns out the limited dict stuff broke and is
 causing nulls in the db

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 4938c9fa75..8e57762400 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -21,12 +21,8 @@ def update_repo_with_dict(repo,new_dict,logger):
             logger: logging object
             db: db object
     """
-    to_insert = {}
-    to_insert['repo_id'] = repo.repo_id # this is here because its needed as a unique key for bulk_insert_dicts
-    to_insert['repo_git'] = repo.repo_git
-    to_insert['repo_path'] = repo.repo_path
-    to_insert['repo_name'] = repo.repo_name
-    to_insert['description'] = repo.description
+    to_insert = dict(repo.__dict__)
+    del to_insert['_sa_instance_state']
     to_insert.update(new_dict)
 
     result = bulk_insert_dicts(logger, to_insert, Repo, ['repo_id'])

From 14eb94337fe3c4c7659510506083969c80ab8ca0 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Wed, 19 Nov 2025 14:14:31 -0500
Subject: [PATCH 076/104] use custom exception types to bubble the exceptions
 up a level and catch them to re-emit celery exceptions.

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py  | 10 ++++++++--
 augur/tasks/github/detect_move/tasks.py | 11 +++++++++--
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 8e57762400..6b47df1a32 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -9,6 +9,12 @@
 from augur.application.db.lib import bulk_insert_dicts
 
 
+class RepoMovedException(Exception):
+    pass
+
+class RepoGoneException(Exception):
+    pass
+
 
 def update_repo_with_dict(repo,new_dict,logger):
     """
@@ -86,7 +92,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
 
         update_repo_with_dict(repo, repo_update_dict, logger)
 
-        raise Exception("ERROR: Repo has moved! Resetting Collection!")
+        raise RepoMovedException("ERROR: Repo has moved! Resetting Collection!")
     
     #Mark as ignore if 404
     if response_from_gh.status_code == 404:
@@ -122,7 +128,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
 
 
         session.commit()
-        raise Exception("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!")
+        raise RepoGoneException("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!")
 
     
     #skip if not 404
diff --git a/augur/tasks/github/detect_move/tasks.py b/augur/tasks/github/detect_move/tasks.py
index f542d89289..6f7b04b8de 100644
--- a/augur/tasks/github/detect_move/tasks.py
+++ b/augur/tasks/github/detect_move/tasks.py
@@ -1,11 +1,13 @@
 import logging
 
-from augur.tasks.github.detect_move.core import *
+from augur.tasks.github.detect_move.core import ping_github_for_repo_move, RepoMovedException, RepoGoneException
 from augur.tasks.init.celery_app import celery_app as celery
 from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask, AugurSecondaryRepoCollectionTask
 from augur.application.db.lib import get_repo_by_repo_git, get_session
 from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
 
+from celery.exceptions import Retry, Reject
+
 
 @celery.task(base=AugurCoreRepoCollectionTask)
 def detect_github_repo_move_core(repo_git : str) -> None:
@@ -24,7 +26,12 @@ def detect_github_repo_move_core(repo_git : str) -> None:
 
         #Ping each repo with the given repo_git to make sure
         #that they are still in place. 
-        ping_github_for_repo_move(session, key_auth, repo, logger)
+        try:
+            ping_github_for_repo_move(session, key_auth, repo, logger)
+        except RepoMovedException as e:
+            raise Retry(e)
+        except RepoGoneException as e:
+            raise Reject(e)
 
 
 @celery.task(base=AugurSecondaryRepoCollectionTask)

From 629da3a8ae2fae8d798731621049b51e1520f5a6 Mon Sep 17 00:00:00 2001
From: PredictiveManish <manish.tiwari.09@zohomail.in>
Date: Fri, 19 Dec 2025 00:39:45 +0530
Subject: [PATCH 077/104] Fix: Updated link of redis windows installation

Signed-off-by: PredictiveManish <manish.tiwari.09@zohomail.in>
---
 docs/source/getting-started/installation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst
index 41bc1be4dc..7f270e3f0f 100644
--- a/docs/source/getting-started/installation.rst
+++ b/docs/source/getting-started/installation.rst
@@ -48,7 +48,7 @@ Caching System (Redis)
 ----------------------
 * `Linux Installation <https://redis.io/docs/getting-started/installation/install-redis-on-linux/>`__
 * `Mac Installation <https://redis.io/docs/getting-started/installation/install-redis-on-mac-os/>`__
-* `Windows Installation <https://redis.io/docs/getting-started/installation/install-redis-on-windows/>`__
+* `Windows Installation <https://redis.io/docs/latest/operate/oss_and_stack/install/archive/install-redis/install-redis-on-windows/>`__
 
 Message Broker (RabbitMQ)
 -------------------------

From ffd2505980707811e6a5fa73c8de9876fa650b0b Mon Sep 17 00:00:00 2001
From: PredictiveManish <manish.tiwari.09@zohomail.in>
Date: Fri, 19 Dec 2025 00:43:10 +0530
Subject: [PATCH 078/104] fix: Updated links for Redis Installation

Signed-off-by: PredictiveManish <manish.tiwari.09@zohomail.in>
---
 docs/source/getting-started/installation.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst
index 7f270e3f0f..e7dce6ffbe 100644
--- a/docs/source/getting-started/installation.rst
+++ b/docs/source/getting-started/installation.rst
@@ -46,8 +46,8 @@ The ``message_insights_worker`` uses a system-level package called OpenMP. You w
 
 Caching System (Redis)
 ----------------------
-* `Linux Installation <https://redis.io/docs/getting-started/installation/install-redis-on-linux/>`__
-* `Mac Installation <https://redis.io/docs/getting-started/installation/install-redis-on-mac-os/>`__
+* `Linux Installation <https://redis.io/docs/latest/operate/oss_and_stack/install/archive/install-redis/install-redis-on-linux/>`__
+* `Mac Installation <https://redis.io/docs/latest/operate/oss_and_stack/install/archive/install-redis/install-redis-on-mac-os/>`__
 * `Windows Installation <https://redis.io/docs/latest/operate/oss_and_stack/install/archive/install-redis/install-redis-on-windows/>`__
 
 Message Broker (RabbitMQ)

From d410b3b9ccab25362fd3f608828651052fd6476d Mon Sep 17 00:00:00 2001
From: Kushagra <Kushagrabhargava93@gmail.com>
Date: Fri, 12 Sep 2025 15:37:02 +0530
Subject: [PATCH 079/104] Fix broken 8knot link in README Signed-off-by:
 Kushagra <Kushagrabhargava93@gmail.com> Signed-off-by: Adrian Edwards
 <adredwar@redhat.com>

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 16bd88f2e5..883b28d982 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Augur is now releasing a dramatically improved new version. It is also available
   - A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard.
   - Materialized views to increase the snappiness of API’s and Frontends on large scale data.
   - Changes to primary keys, which now employ a UUID strategy that ensures unique keys across all Augur instances.
-  - Support for [8knot](https://github.com/oss-aspen/8kno) dashboards (view a sample [here](https://eightknot.osci.io/)).
+  - Support for [8knot](https://github.com/oss-aspen/8knot) dashboards (view a sample [here](https://eightknot.osci.io/)).
   *beautification coming soon!*
   - Data collection completeness assurance enabled by a structured, relational data set that is easily compared with platform API Endpoints.
 - The next release of the new version will include a hosted version of Augur where anyone can create an account and add repos *they care about*.

From 49af484d288ed33b95a483b7436de1e6a4bae0c6 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Fri, 19 Dec 2025 14:07:46 -0500
Subject: [PATCH 080/104] update Code of Conduct link

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 883b28d982..e59180de0c 100644
--- a/README.md
+++ b/README.md
@@ -78,7 +78,7 @@ If you get stuck, please feel free to [ask for help](https://github.com/chaoss/a
 
 ## Contributing
 
-To contribute to Augur, please follow the guidelines found in our [CONTRIBUTING.md](CONTRIBUTING.md) and our [Code of Conduct](CODE_OF_CONDUCT.md). Augur is a welcoming community that is open to all, regardless if you're working on your 1000th contribution to open source or your 1st.
+To contribute to Augur, please follow the guidelines found in our [CONTRIBUTING.md](CONTRIBUTING.md) and the CHAOSS [Code of Conduct]([CODE_OF_CONDUCT.md](https://github.com/chaoss/.github/blob/main/CODE_OF_CONDUCT.md)). Augur is a welcoming community that is open to all, regardless if you're working on your 1000th contribution to open source or your 1st.
 We strongly believe that much of what makes open source so great is the incredible communities it brings together, so we invite you to join us!
 
 ## License, Copyright, and Funding

From 3b6f57572721cdb78f546d3da9b8a8eedd7996e7 Mon Sep 17 00:00:00 2001
From: Pratyksh Gupta <pratykshgupta9999@gmail.com>
Date: Sat, 27 Dec 2025 23:16:20 +0530
Subject: [PATCH 081/104] Fix #3474: Add default value for AUGUR_DOCKER_DEPLOY
 to prevent AttributeError on bare metal installs

Signed-off-by: Pratyksh Gupta <pratykshgupta9999@gmail.com>
---
 augur/api/gunicorn_conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/augur/api/gunicorn_conf.py b/augur/api/gunicorn_conf.py
index dd1bfc6961..6586b6f69a 100644
--- a/augur/api/gunicorn_conf.py
+++ b/augur/api/gunicorn_conf.py
@@ -40,7 +40,7 @@
 # set the log location for gunicorn    
 logs_directory = get_value('Logging', 'logs_directory')
 
-is_docker = os.getenv("AUGUR_DOCKER_DEPLOY").lower() in ('true', '1', 't', 'y', 'yes')
+is_docker = os.getenv("AUGUR_DOCKER_DEPLOY", 'False').lower() in ('true', '1', 't', 'y', 'yes')
 accesslog = f"{logs_directory}/gunicorn.log"
 errorlog = f"{logs_directory}/gunicorn.log"
 

From 0156030f2bb76c4d8b76cc21a295cb749f519b90 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 6 Jan 2026 13:01:48 -0500
Subject: [PATCH 082/104] allow config sources in the config class to be
 overridden via a parameter and skip the db stuff

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/config.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/augur/application/config.py b/augur/application/config.py
index a3fe00a2e0..5a42231484 100644
--- a/augur/application/config.py
+++ b/augur/application/config.py
@@ -143,24 +143,28 @@ def base_config(self):
 
         return config
 
-    def __init__(self, logger, session: DatabaseSession):
+    def __init__(self, logger, session: DatabaseSession, config_sources: list = None):
 
         self.session = session
         self.logger = logger
 
         self.accepted_types = ["str", "bool", "int", "float", "NoneType"]
 
-        # list items in order of precedence. lowest precedence (i.e. fallback) values first 
-        self.config_sources = [
-            JsonConfig(default_config, logger)
-        ]
+        if not config_sources:
+            # list items in order of precedence. lowest precedence (i.e. fallback) values first 
+            config_sources = [
+                JsonConfig(default_config, logger)
+            ]
 
-        config_dir = Path(os.getenv("CONFIG_DATADIR", "./"))
-        config_path = config_dir.joinpath("augur.json")
-        if config_path.exists():
-            self.config_sources.append(JsonConfig(json.loads(config_path.read_text(encoding="UTF-8")), logger))
+            config_dir = Path(os.getenv("CONFIG_DATADIR", "./"))
+            config_path = config_dir.joinpath("augur.json")
+            if config_path.exists():
+                config_sources.append(JsonConfig(json.loads(config_path.read_text(encoding="UTF-8")), logger))
+            
+            config_sources.append( DatabaseConfig(session, logger) )
+
+        self.config_sources = config_sources
         
-        self.config_sources.append( DatabaseConfig(session, logger) )
 
     def _get_writable_source(self) -> 'ConfigStore':
         """Returns the highest precedence source that can be written to.

From bcbe9e8472d1d2d720fec25a544732ac710687bb Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 6 Jan 2026 13:05:48 -0500
Subject: [PATCH 083/104] Write unit test demonstrating the problem

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 tests/test_classes/test_config_stores.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py
index 69fe19017f..30fd09f541 100644
--- a/tests/test_classes/test_config_stores.py
+++ b/tests/test_classes/test_config_stores.py
@@ -109,3 +109,23 @@ def test_fetching_real_defaults(mock_logger, mock_session):
     cfg.config_sources = [JsonConfig(default_config, mock_logger)]
 
     assert cfg.get_value("Redis", "cache_group") == 0
+
+
+def test_get_section_incorporates_hierarchy():
+
+    default_dict = {
+        "Section1": {"alpha": 1, "beta": "x"},
+        "Section2": {"gamma": False, "delta": 3.14},
+    }
+
+    override_dict = {
+        "Section1": {"beta": "y"},
+        "Section2": {"gamma": False, "delta": 3.14},
+    }
+
+    cfg = AugurConfig(None, None, [JsonConfig(default_dict, mock_logger), JsonConfig(override_dict, mock_logger)])
+
+    expected_dict = {"alpha": 1, "beta": "y"}
+
+    assert cfg.get_section("Section1") == expected_dict
+

From 6dbb3bf62bef35a6eb3dfef49b0d77ef8b9de396 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 6 Jan 2026 13:39:42 -0500
Subject: [PATCH 084/104] modify the test to utilize load_config, since thats
 whats relied on for the merging functionality

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 tests/test_classes/test_config_stores.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py
index 30fd09f541..b55275cfb7 100644
--- a/tests/test_classes/test_config_stores.py
+++ b/tests/test_classes/test_config_stores.py
@@ -111,6 +111,30 @@ def test_fetching_real_defaults(mock_logger, mock_session):
     assert cfg.get_value("Redis", "cache_group") == 0
 
 
+def test_load_config_utilizes_hierarchy():
+
+    default_dict = {
+        "Section1": {"alpha": 1, "beta": "x"},
+        "Section2": {"gamma": False, "delta": 3.14},
+    }
+
+    override_dict = {
+        "Section1": {"beta": "y"},
+        "Section2": {"Epsilon": True, "delta": 6.28},
+        "Section3": {"hi": "there"}
+    }
+
+    cfg = AugurConfig(None, None, [JsonConfig(default_dict, mock_logger), JsonConfig(override_dict, mock_logger)])
+
+    expected_dict = {
+        "Section1": {"alpha": 1, "beta": "y"},
+        "Section2": {"gamma": False, "Epsilon": True, "delta": 6.28},
+        "Section3": {"hi": "there"} # test that new sections are accounted for too
+    }
+
+    assert cfg.load_config() == expected_dict
+
+
 def test_get_section_incorporates_hierarchy():
 
     default_dict = {

From a7b2cd9c836e5dd6d7a183fe0e86d74e2c6a33e6 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 6 Jan 2026 13:41:54 -0500
Subject: [PATCH 085/104] Fix the issue

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/config.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/augur/application/config.py b/augur/application/config.py
index 5a42231484..ded2f419cc 100644
--- a/augur/application/config.py
+++ b/augur/application/config.py
@@ -234,10 +234,36 @@ def load_config(self) -> dict:
         Returns:
             The config from all sources
         """
+
+        def merge(a: dict, b: dict, path=[]):
+            """Do a deep merge of two python dictionaries (standard library update and merge dont do this)
+            This is what allows updated values in higher priority config sources to take precedence.
+
+            This function is lightly modified from https://stackoverflow.com/a/7205107
+
+            Args:
+                a (dict): The dict to merge into. Will be mutated
+                b (dict): The incoming dict to merge in. Data in this dict will take precedence when there is a conflict
+                path (list, optional): Keeps track of the path during the recursion process. Not intended for use by consumers. Defaults to [].
+
+            Returns:
+                dict: The dict passed in via parameter a, now modified with the new values
+            """
+            for key in b:
+                if key in a:
+                    if isinstance(a[key], dict) and isinstance(b[key], dict):
+                        merge(a[key], b[key], path + [str(key)])
+                    elif a[key] != b[key]:
+                        # raise Exception('Conflict at ' + '.'.join(path + [str(key)]))
+                        a[key] = b[key]
+                else:
+                    a[key] = b[key]
+            return a
+
         config = {}
 
         for config_source in self.config_sources:
-            config.update(config_source.retrieve_dict())
+            merge(config, config_source.retrieve_dict())
         
         return config
 

From 7dc00820adaa72c9ec70c223c91f2bad0c732228 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 6 Jan 2026 13:42:10 -0500
Subject: [PATCH 086/104] improve how JsonSource's identify themselves in the
 logs

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/config.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/augur/application/config.py b/augur/application/config.py
index ded2f419cc..8c5b452eb5 100644
--- a/augur/application/config.py
+++ b/augur/application/config.py
@@ -624,6 +624,9 @@ def get_value(self, section_name: str, value_key: str):
         
         return self.json_data[section_name].get(value_key, None)
 
+    def __repr__(self):
+        return f"JsonSource({self.json_data})"
+
 
 
 class DatabaseConfig(ConfigStore):

From dfa43c851ad143b15b55a97e8c0231f0a346f53a Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 6 Jan 2026 13:47:18 -0500
Subject: [PATCH 087/104] prevent accidental modification of JSON config values
 from externally

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/config.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/augur/application/config.py b/augur/application/config.py
index 8c5b452eb5..1f91a3ef0a 100644
--- a/augur/application/config.py
+++ b/augur/application/config.py
@@ -1,6 +1,7 @@
 import sqlalchemy as s
 from sqlalchemy import and_, update
 import json
+import copy
 from typing import List, Any, Optional
 import os
 from augur.application.db.models import Config 
@@ -559,6 +560,11 @@ def load_dict(self, data: dict, ignore_existing=False):
             self.json_data.update(data)
 
     def retrieve_dict(self):
+        # if this dict isnt supposed to be mutable, we need to make a copy
+        # this prevents being able to change data in this object by reference
+        
+        if not self.writable:
+            return copy.deepcopy(self.json_data)
         return self.json_data
 
     def clear(self):

From b5a126dbca93504ad010b06f67bd280d6af78e0d Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 6 Jan 2026 13:58:38 -0500
Subject: [PATCH 088/104] add docs for the init parameters

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/config.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/augur/application/config.py b/augur/application/config.py
index 1f91a3ef0a..d6447c7bdf 100644
--- a/augur/application/config.py
+++ b/augur/application/config.py
@@ -145,6 +145,14 @@ def base_config(self):
         return config
 
     def __init__(self, logger, session: DatabaseSession, config_sources: list = None):
+        """Create a new AugurConfig class
+
+        Args:
+            logger (_type_): The logger instance to use for logging
+            session (DatabaseSession): a connection to the database for configuring the database source.
+            config_sources (list, optional): An alternative way to pass in config sources. Used for unit testing only.
+                Specifying a value here enables you to supply `None` to the `session` argument, since it will be unused. Defaults to None.
+        """
 
         self.session = session
         self.logger = logger

From 155186f5f0c373907a63703f64d705247e7c46ca Mon Sep 17 00:00:00 2001
From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
Date: Tue, 6 Jan 2026 15:52:42 -0800
Subject: [PATCH 089/104] remove path tracking

Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
---
 augur/application/config.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/augur/application/config.py b/augur/application/config.py
index d6447c7bdf..41a7290200 100644
--- a/augur/application/config.py
+++ b/augur/application/config.py
@@ -244,7 +244,7 @@ def load_config(self) -> dict:
             The config from all sources
         """
 
-        def merge(a: dict, b: dict, path=[]):
+        def merge(a: dict, b: dict):
             """Do a deep merge of two python dictionaries (standard library update and merge dont do this)
             This is what allows updated values in higher priority config sources to take precedence.
 
@@ -253,7 +253,6 @@ def merge(a: dict, b: dict, path=[]):
             Args:
                 a (dict): The dict to merge into. Will be mutated
                 b (dict): The incoming dict to merge in. Data in this dict will take precedence when there is a conflict
-                path (list, optional): Keeps track of the path during the recursion process. Not intended for use by consumers. Defaults to [].
 
             Returns:
                 dict: The dict passed in via parameter a, now modified with the new values
@@ -261,9 +260,8 @@ def merge(a: dict, b: dict, path=[]):
             for key in b:
                 if key in a:
                     if isinstance(a[key], dict) and isinstance(b[key], dict):
-                        merge(a[key], b[key], path + [str(key)])
+                        merge(a[key], b[key])
                     elif a[key] != b[key]:
-                        # raise Exception('Conflict at ' + '.'.join(path + [str(key)]))
                         a[key] = b[key]
                 else:
                     a[key] = b[key]

From 64ad1a5c0f54e94e4f5b5e84633cc777f9207680 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Wed, 7 Jan 2026 09:46:36 -0500
Subject: [PATCH 090/104] fix test for retrieving the correct dict

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 tests/test_classes/test_config_stores.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py
index b55275cfb7..d6d1fb81a0 100644
--- a/tests/test_classes/test_config_stores.py
+++ b/tests/test_classes/test_config_stores.py
@@ -30,7 +30,7 @@ def test_jsonconfig_retrieve_has_get(mock_logger):
     cfg = JsonConfig(data, mock_logger)
 
     # retrieve full dict
-    assert cfg.retrieve_dict() is data
+    assert cfg.retrieve_dict() == data
 
     # has/get section
     assert cfg.has_section("Alpha") is True

From b5920e1da2521e7fc0f7bcbeaae559936d937473 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Wed, 7 Jan 2026 09:51:33 -0500
Subject: [PATCH 091/104] add and fix test case for verifying write protection
 for the JSON config

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/config.py              |  2 ++
 tests/test_classes/test_config_stores.py | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/augur/application/config.py b/augur/application/config.py
index 41a7290200..f46f6dc278 100644
--- a/augur/application/config.py
+++ b/augur/application/config.py
@@ -546,6 +546,8 @@ class JsonConfig(ConfigStore):
 
     def __init__(self, json_data, logger: logging.Logger):
         super().__init__(logger)
+        if not self.writable:
+            json_data = copy.deepcopy(json_data)
         self.json_data = json_data
 
     @property
diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py
index d6d1fb81a0..003f19431d 100644
--- a/tests/test_classes/test_config_stores.py
+++ b/tests/test_classes/test_config_stores.py
@@ -25,6 +25,25 @@ def test_jsonconfig_empty_true_false(mock_logger):
     assert JsonConfig({"A": {}}, mock_logger).empty is False
 
 
+def test_jsonconfig_write_protection(mock_logger):
+    # JsonConfig should be not writeable by default, so we should be unable to change
+    # its values, even by abusing references
+    
+    data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}}
+    cfg = JsonConfig(data, mock_logger)
+
+    # mutation via input
+    data["Alpha"]["a"] = 2
+
+    config_test = cfg.retrieve_dict() 
+    assert config_test != data # the data in the config should not change
+
+    # mutation via output
+    config_test["Alpha"]["a"] = 3
+    
+    config_test = cfg.retrieve_dict() 
+    assert config_test != data # the data in the config should not change
+
 def test_jsonconfig_retrieve_has_get(mock_logger):
     data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}}
     cfg = JsonConfig(data, mock_logger)

From 0fb462a388f588280f87d58644b3e86d1122bd88 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Wed, 19 Nov 2025 17:15:44 -0500
Subject: [PATCH 092/104] add CI job for running the unit tests with pytest
 using pytest avoids two layers of python environment "the gap between task
 runners like tox and test runners like pytest is narrower now" - Gemini

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 .github/workflows/functional_test.yml | 30 +++++++++++++++++++++++++++
 pyproject.toml                        | 13 ++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 .github/workflows/functional_test.yml

diff --git a/.github/workflows/functional_test.yml b/.github/workflows/functional_test.yml
new file mode 100644
index 0000000000..06e3af0ef1
--- /dev/null
+++ b/.github/workflows/functional_test.yml
@@ -0,0 +1,30 @@
+name: "Functional tests"
+# Runs automated test suites that ensure functionality is preserved. Any failures should prevent code from shipping.
+on:
+  pull_request:
+    branches: [main, release]
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    name: test with ${{ matrix.env }} on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        env: ["3.10", "3.11", "3.12", "3.13", "3.14"]
+        os: [ubuntu-latest, macos-latest]
+    steps:
+      - uses: actions/checkout@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          enable-cache: true
+
+      - name: Run Tests
+        run: |
+          uv run --python ${{ matrix.env }} pytest \
+            tests/test_classes \
+            --color=yes
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 8193867b81..064b5e7bdd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -187,6 +187,19 @@ legacy_tox_ini = """
     addopts = -ra -s
 """
 
+[tool.pytest.ini_options]
+addopts = "-ra -s"
+testpaths = [
+    "tests/test_classes",
+    # "tests/test_routes", # runs, but needs a fixture for connecting to the web interface of Augur
+    # "tests/test_metrics",
+    # "tests/test_tasks",
+    # "tests/test_application",
+    # "tests/test_workers",
+    # "tests/test_workers/worker_persistence/",
+    # "tests/test_routes/runner.py"
+]
+
 [tool.mypy]
 files = ['augur/application/db/*.py']
 ignore_missing_imports = true

From 76b7ceb11304189802ae75587d7ebe43e01b98a6 Mon Sep 17 00:00:00 2001
From: pushpit kamboj <pushpitkamboj@gmail.com>
Date: Fri, 9 Jan 2026 08:36:14 +0530
Subject: [PATCH 093/104] (fix): remove no else raise and no else return rules
 from .pylintrc

Signed-off-by: pushpit kamboj <pushpitkamboj@gmail.com>
---
 .pylintrc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pylintrc b/.pylintrc
index c319333602..f18952423d 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -142,7 +142,7 @@ confidence=HIGH,
 
 # Only enable specific messages
 disable=all
-enable=unused-import,redefined-outer-name,E1206,E1205,E0704,E0107,E4702,E1101,E0211,E0213,E0103,E1133,E1120,E3102,E0602,E1123,E0001,W0702,W1404,W0706,W0101,W0120,W0718,R1737,R1705,R1720,R1724,R1723,R0401,R1701,C1802,C0200,C0501,C0201,W1001,E1102,R0923
+enable=unused-import,redefined-outer-name,E1206,E1205,E0704,E0107,E4702,E1101,E0211,E0213,E0103,E1133,E1120,E3102,E0602,E1123,E0001,W0702,W1404,W0706,W0101,W0120,W0718,R1737,R1724,R1723,R0401,R1701,C1802,C0200,C0501,C0201,W1001,E1102,R0923
 
 
 [LOGGING]

From 53eed1a9213af447bb6cd6b380a701fa963dd4b7 Mon Sep 17 00:00:00 2001
From: iGufrankhan <gufrankhankab123@gmail.com>
Date: Sat, 10 Jan 2026 00:33:11 +0000
Subject: [PATCH 094/104] Remove stale explorer_libyear_detail refresh

Signed-off-by: iGufrankhan <gufrankhankab123@gmail.com>
---
 scripts/control/refresh-matviews.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/control/refresh-matviews.sh b/scripts/control/refresh-matviews.sh
index 576466b2df..1d1756031d 100644
--- a/scripts/control/refresh-matviews.sh
+++ b/scripts/control/refresh-matviews.sh
@@ -6,6 +6,5 @@ psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur
 psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data;'
 psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data;'
 psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_all with data;'
-psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_detail with data;'
 psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data;'
 psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data;'
\ No newline at end of file

From 22f81b56a81cd7e4dc04938df7fcf977fe95fc9a Mon Sep 17 00:00:00 2001
From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
Date: Tue, 6 Jan 2026 17:13:14 -0800
Subject: [PATCH 095/104] Revert database url retrieval so bare metal works

Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
---
 augur/application/schema/alembic/env.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index bf2993c4b1..5b00c4a80d 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -3,6 +3,7 @@
 
 from alembic import context
 from augur.application.db.models.base import Base
+from augur.application.db.engine import get_database_string
 from sqlalchemy import create_engine
 from dotenv import load_dotenv
 import os
@@ -33,7 +34,7 @@
 
 # possibly swap sqlalchemy.url with AUGUR_DB env var too
 
-sqlalchemy_url = os.getenv("AUGUR_DB") or config.get_main_option("sqlalchemy.url")
+sqlalchemy_url = get_database_string()
 
 
 VERSIONS_DIR = Path(__file__).parent / "versions"

From 41059062a76ba183a32ae84617395098255a35c6 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
Date: Tue, 6 Jan 2026 17:21:35 -0800
Subject: [PATCH 096/104] unused os import

Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
---
 augur/application/schema/alembic/env.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py
index 5b00c4a80d..3ae3afdb53 100644
--- a/augur/application/schema/alembic/env.py
+++ b/augur/application/schema/alembic/env.py
@@ -6,7 +6,6 @@
 from augur.application.db.engine import get_database_string
 from sqlalchemy import create_engine
 from dotenv import load_dotenv
-import os
 import re
 from pathlib import Path
 
@@ -32,11 +31,8 @@
 # my_important_option = config.get_main_option("my_important_option")
 # ... etc.
 
-# possibly swap sqlalchemy.url with AUGUR_DB env var too
-
 sqlalchemy_url = get_database_string()
 
-
 VERSIONS_DIR = Path(__file__).parent / "versions"
 
 def _next_int_rev() -> str:

From 43aaf92aac4d9a1688c847aab05b886e79e0abf3 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
Date: Fri, 9 Jan 2026 22:37:42 -0500
Subject: [PATCH 097/104] Disable tests for 3.12+ so they work

Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
---
 .github/workflows/functional_test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/functional_test.yml b/.github/workflows/functional_test.yml
index 06e3af0ef1..544029df0e 100644
--- a/.github/workflows/functional_test.yml
+++ b/.github/workflows/functional_test.yml
@@ -14,7 +14,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        env: ["3.10", "3.11", "3.12", "3.13", "3.14"]
+        env: ["3.10", "3.11"] #, "3.12", "3.13", "3.14"
         os: [ubuntu-latest, macos-latest]
     steps:
       - uses: actions/checkout@v5
@@ -27,4 +27,4 @@ jobs:
         run: |
           uv run --python ${{ matrix.env }} pytest \
             tests/test_classes \
-            --color=yes
\ No newline at end of file
+            --color=yes

From 4e2d0143043d2cd341f0a96baa20f0836dfbf68d Mon Sep 17 00:00:00 2001
From: Noaman-Akhtar <akhtarnoaman@gmail.com>
Date: Sat, 10 Jan 2026 22:42:46 +0000
Subject: [PATCH 098/104] Deleted the augur-retired-sql.schema file

Signed-off-by: Noaman-Akhtar <akhtarnoaman@gmail.com>
---
 docker/database/augur-retired-sql.schema | 10330 ---------------------
 1 file changed, 10330 deletions(-)
 delete mode 100644 docker/database/augur-retired-sql.schema

diff --git a/docker/database/augur-retired-sql.schema b/docker/database/augur-retired-sql.schema
deleted file mode 100644
index ac3872a1d5..0000000000
--- a/docker/database/augur-retired-sql.schema
+++ /dev/null
@@ -1,10330 +0,0 @@
---
--- PostgreSQL database dump
---
-
--- Dumped from database version 12.11 (Ubuntu 12.11-0ubuntu0.20.04.1)
--- Dumped by pg_dump version 12.11 (Ubuntu 12.11-0ubuntu0.20.04.1)
-
-SET statement_timeout = 0;
-SET lock_timeout = 0;
-SET idle_in_transaction_session_timeout = 0;
-SET client_encoding = 'UTF8';
-SET standard_conforming_strings = on;
-SELECT pg_catalog.set_config('search_path', '', false);
-SET check_function_bodies = false;
-SET xmloption = content;
-SET client_min_messages = warning;
-SET row_security = off;
-
---
--- Name: augur_data; Type: SCHEMA; Schema: -; Owner: augur
---
-
-CREATE SCHEMA augur_data;
-
-
-ALTER SCHEMA augur_data OWNER TO augur;
-
---
--- Name: augur_operations; Type: SCHEMA; Schema: -; Owner: augur
---
-
-CREATE SCHEMA augur_operations;
-
-
-ALTER SCHEMA augur_operations OWNER TO augur;
-
---
--- Name: spdx; Type: SCHEMA; Schema: -; Owner: augur
---
-
-CREATE SCHEMA spdx;
-
-
-ALTER SCHEMA spdx OWNER TO augur;
-
---
--- Name: toss_specific; Type: SCHEMA; Schema: -; Owner: augur
---
-
-CREATE SCHEMA toss_specific;
-
-
-ALTER SCHEMA toss_specific OWNER TO augur;
-
---
--- Name: refresh_aggregates(); Type: PROCEDURE; Schema: augur_data; Owner: augur
---
-
-CREATE PROCEDURE augur_data.refresh_aggregates()
-    LANGUAGE plpgsql
-    AS $$
-    begin
-        perform pg_advisory_lock(124);
-        execute 'REFRESH MATERIALIZED VIEW "augur_data"."issue_reporter_created_at"';
-        perform pg_advisory_unlock(124);
-    end;
-$$;
-
-
-ALTER PROCEDURE augur_data.refresh_aggregates() OWNER TO augur;
-
---
--- Name: create_constraint_if_not_exists(text, text, text); Type: FUNCTION; Schema: public; Owner: augur
---
-
-CREATE FUNCTION public.create_constraint_if_not_exists(t_name text, c_name text, constraint_sql text) RETURNS void
-    LANGUAGE plpgsql
-    AS $$
-  BEGIN
-    -- Look for our constraint
-    IF NOT EXISTS (SELECT constraint_name
-                   FROM information_schema.constraint_column_usage
-                   WHERE constraint_name = c_name) THEN
-        EXECUTE 'ALTER TABLE ' || t_name || ' ADD CONSTRAINT ' || c_name || ' ' || constraint_sql;
-    END IF;
-  END;
-$$;
-
-
-ALTER FUNCTION public.create_constraint_if_not_exists(t_name text, c_name text, constraint_sql text) OWNER TO augur;
-
---
--- Name: pc_chartoint(character varying); Type: FUNCTION; Schema: public; Owner: augur
---
-
-CREATE FUNCTION public.pc_chartoint(chartoconvert character varying) RETURNS integer
-    LANGUAGE sql IMMUTABLE STRICT
-    AS $_$
-SELECT CASE WHEN trim($1) SIMILAR TO '[0-9]+' 
-        THEN CAST(trim($1) AS integer) 
-    ELSE NULL END;
-
-$_$;
-
-
-ALTER FUNCTION public.pc_chartoint(chartoconvert character varying) OWNER TO augur;
-
---
--- Name: refresh_aggregates(); Type: PROCEDURE; Schema: public; Owner: augur
---
-
-CREATE PROCEDURE public.refresh_aggregates()
-    LANGUAGE plpgsql
-    AS $$
-    begin
-        perform pg_advisory_lock(124);
-        execute 'REFRESH MATERIALIZED VIEW "augur_data"."issue_reporter_created_at"';
-        perform pg_advisory_unlock(124);
-    end;
-$$;
-
-
-ALTER PROCEDURE public.refresh_aggregates() OWNER TO augur;
-
-SET default_tablespace = '';
-
-SET default_table_access_method = heap;
-
---
--- Name: analysis_log; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.analysis_log (
-    repos_id integer NOT NULL,
-    status character varying NOT NULL,
-    date_attempted timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL
-);
-
-
-ALTER TABLE augur_data.analysis_log OWNER TO augur;
-
---
--- Name: augur_data.repo_insights_ri_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data."augur_data.repo_insights_ri_id_seq"
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data."augur_data.repo_insights_ri_id_seq" OWNER TO augur;
-
---
--- Name: chaoss_metric_status_cms_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.chaoss_metric_status_cms_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.chaoss_metric_status_cms_id_seq OWNER TO augur;
-
---
--- Name: chaoss_metric_status; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.chaoss_metric_status (
-    cms_id bigint DEFAULT nextval('augur_data.chaoss_metric_status_cms_id_seq'::regclass) NOT NULL,
-    cm_group character varying,
-    cm_source character varying,
-    cm_type character varying,
-    cm_backend_status character varying,
-    cm_frontend_status character varying,
-    cm_defined boolean,
-    cm_api_endpoint_repo character varying,
-    cm_api_endpoint_rg character varying,
-    cm_name character varying,
-    cm_working_group character varying,
-    cm_info json,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cm_working_group_focus_area character varying
-);
-
-
-ALTER TABLE augur_data.chaoss_metric_status OWNER TO augur;
-
---
--- Name: TABLE chaoss_metric_status; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.chaoss_metric_status IS 'This table used to track CHAOSS Metric implementations in Augur, but due to the constantly changing location of that information, it is for the moment not actively populated. ';
-
-
---
--- Name: chaoss_user; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.chaoss_user (
-    chaoss_id bigint NOT NULL,
-    chaoss_login_name character varying,
-    chaoss_login_hashword character varying,
-    chaoss_email character varying,
-    chaoss_text_phone character varying,
-    chaoss_first_name character varying,
-    chaoss_last_name character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(6) with time zone DEFAULT now()
-);
-
-
-ALTER TABLE augur_data.chaoss_user OWNER TO augur;
-
---
--- Name: chaoss_user_chaoss_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.chaoss_user_chaoss_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.chaoss_user_chaoss_id_seq OWNER TO augur;
-
---
--- Name: chaoss_user_chaoss_id_seq; Type: SEQUENCE OWNED BY; Schema: augur_data; Owner: augur
---
-
-ALTER SEQUENCE augur_data.chaoss_user_chaoss_id_seq OWNED BY augur_data.chaoss_user.chaoss_id;
-
-
---
--- Name: commit_comment_ref_cmt_comment_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.commit_comment_ref_cmt_comment_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.commit_comment_ref_cmt_comment_id_seq OWNER TO augur;
-
---
--- Name: commit_comment_ref; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.commit_comment_ref (
-    cmt_comment_id bigint DEFAULT nextval('augur_data.commit_comment_ref_cmt_comment_id_seq'::regclass) NOT NULL,
-    cmt_id bigint NOT NULL,
-    repo_id bigint,
-    msg_id bigint NOT NULL,
-    user_id bigint NOT NULL,
-    body text,
-    line bigint,
-    "position" bigint,
-    commit_comment_src_node_id character varying,
-    cmt_comment_src_id bigint NOT NULL,
-    created_at timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.commit_comment_ref OWNER TO augur;
-
---
--- Name: COLUMN commit_comment_ref.commit_comment_src_node_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.commit_comment_ref.commit_comment_src_node_id IS 'For data provenance, we store the source node ID if it exists. ';
-
-
---
--- Name: COLUMN commit_comment_ref.cmt_comment_src_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.commit_comment_ref.cmt_comment_src_id IS 'For data provenance, we store the source ID if it exists. ';
-
-
---
--- Name: commit_parents_parent_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.commit_parents_parent_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.commit_parents_parent_id_seq OWNER TO augur;
-
---
--- Name: commit_parents; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.commit_parents (
-    cmt_id bigint NOT NULL,
-    parent_id bigint DEFAULT nextval('augur_data.commit_parents_parent_id_seq'::regclass) NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.commit_parents OWNER TO augur;
-
---
--- Name: commits_cmt_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.commits_cmt_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.commits_cmt_id_seq OWNER TO augur;
-
---
--- Name: commits; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.commits (
-    cmt_id bigint DEFAULT nextval('augur_data.commits_cmt_id_seq'::regclass) NOT NULL,
-    repo_id bigint NOT NULL,
-    cmt_commit_hash character varying(80) NOT NULL,
-    cmt_author_name character varying NOT NULL,
-    cmt_author_raw_email character varying NOT NULL,
-    cmt_author_email character varying NOT NULL,
-    cmt_author_date character varying(10) NOT NULL,
-    cmt_author_affiliation character varying DEFAULT 'NULL'::character varying,
-    cmt_committer_name character varying NOT NULL,
-    cmt_committer_raw_email character varying NOT NULL,
-    cmt_committer_email character varying NOT NULL,
-    cmt_committer_date character varying NOT NULL,
-    cmt_committer_affiliation character varying DEFAULT 'NULL'::character varying,
-    cmt_added integer NOT NULL,
-    cmt_removed integer NOT NULL,
-    cmt_whitespace integer NOT NULL,
-    cmt_filename character varying NOT NULL,
-    cmt_date_attempted timestamp(0) without time zone NOT NULL,
-    cmt_ght_committer_id integer,
-    cmt_ght_committed_at timestamp(0) without time zone,
-    cmt_committer_timestamp timestamp(0) with time zone,
-    cmt_author_timestamp timestamp(0) with time zone,
-    cmt_author_platform_username character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cmt_ght_author_id uuid
-);
-
-
-ALTER TABLE augur_data.commits OWNER TO augur;
-
---
--- Name: TABLE commits; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.commits IS 'Commits.
-Each row represents changes to one FILE within a single commit. So you will encounter multiple rows per commit hash in many cases. ';
-
-
---
--- Name: contributor_affiliations_ca_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.contributor_affiliations_ca_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.contributor_affiliations_ca_id_seq OWNER TO augur;
-
---
--- Name: contributor_affiliations; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.contributor_affiliations (
-    ca_id bigint DEFAULT nextval('augur_data.contributor_affiliations_ca_id_seq'::regclass) NOT NULL,
-    ca_domain character varying(64) NOT NULL,
-    ca_start_date date DEFAULT '1970-01-01'::date,
-    ca_last_used timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
-    ca_affiliation character varying,
-    ca_active smallint DEFAULT 1,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.contributor_affiliations OWNER TO augur;
-
---
--- Name: TABLE contributor_affiliations; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.contributor_affiliations IS 'This table exists outside of relations with other tables. The purpose is to provide a dynamic, owner maintained (and augur augmented) list of affiliations. This table is processed in affiliation information in the DM_ tables generated when Augur is finished counting commits using the Facade Worker. ';
-
-
---
--- Name: contributor_repo_cntrb_repo_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.contributor_repo_cntrb_repo_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.contributor_repo_cntrb_repo_id_seq OWNER TO augur;
-
---
--- Name: contributor_repo; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.contributor_repo (
-    cntrb_repo_id bigint DEFAULT nextval('augur_data.contributor_repo_cntrb_repo_id_seq'::regclass) NOT NULL,
-    repo_git character varying NOT NULL,
-    repo_name character varying NOT NULL,
-    gh_repo_id bigint NOT NULL,
-    cntrb_category character varying,
-    event_id bigint,
-    created_at timestamp(0) without time zone,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid NOT NULL
-);
-
-
-ALTER TABLE augur_data.contributor_repo OWNER TO augur;
-
---
--- Name: TABLE contributor_repo; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.contributor_repo IS 'Developed in Partnership with Andrew Brain. 
-From: [
-  {
-    "login": "octocat",
-    "id": 1,
-    "node_id": "MDQ6VXNlcjE=",
-    "avatar_url": "https://github.com/images/error/octocat_happy.gif",
-    "gravatar_id": "",
-    "url": "https://api.github.com/users/octocat",
-    "html_url": "https://github.com/octocat",
-    "followers_url": "https://api.github.com/users/octocat/followers",
-    "following_url": "https://api.github.com/users/octocat/following{/other_user}",
-    "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}",
-    "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}",
-    "subscriptions_url": "https://api.github.com/users/octocat/subscriptions",
-    "organizations_url": "https://api.github.com/users/octocat/orgs",
-    "repos_url": "https://api.github.com/users/octocat/repos",
-    "events_url": "https://api.github.com/users/octocat/events{/privacy}",
-    "received_events_url": "https://api.github.com/users/octocat/received_events",
-    "type": "User",
-    "site_admin": false
-  }
-]
-';
-
-
---
--- Name: COLUMN contributor_repo.repo_git; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributor_repo.repo_git IS 'Similar to cntrb_id, we need this data for the table to have meaningful data. ';
-
-
---
--- Name: COLUMN contributor_repo.cntrb_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributor_repo.cntrb_id IS 'This is not null because what is the point without the contributor in this table? ';
-
-
---
--- Name: contributors; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.contributors (
-    cntrb_login character varying,
-    cntrb_email character varying,
-    cntrb_full_name character varying,
-    cntrb_company character varying,
-    cntrb_created_at timestamp(0) without time zone,
-    cntrb_type character varying,
-    cntrb_fake smallint DEFAULT 0,
-    cntrb_deleted smallint DEFAULT 0,
-    cntrb_long numeric(11,8) DEFAULT NULL::numeric,
-    cntrb_lat numeric(10,8) DEFAULT NULL::numeric,
-    cntrb_country_code character(3) DEFAULT NULL::bpchar,
-    cntrb_state character varying,
-    cntrb_city character varying,
-    cntrb_location character varying,
-    cntrb_canonical character varying,
-    cntrb_last_used timestamp(0) with time zone DEFAULT NULL::timestamp with time zone,
-    gh_user_id bigint,
-    gh_login character varying,
-    gh_url character varying,
-    gh_html_url character varying,
-    gh_node_id character varying,
-    gh_avatar_url character varying,
-    gh_gravatar_id character varying,
-    gh_followers_url character varying,
-    gh_following_url character varying,
-    gh_gists_url character varying,
-    gh_starred_url character varying,
-    gh_subscriptions_url character varying,
-    gh_organizations_url character varying,
-    gh_repos_url character varying,
-    gh_events_url character varying,
-    gh_received_events_url character varying,
-    gh_type character varying,
-    gh_site_admin character varying,
-    gl_web_url character varying,
-    gl_avatar_url character varying,
-    gl_state character varying,
-    gl_username character varying,
-    gl_full_name character varying,
-    gl_id bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid NOT NULL
-);
-
-
-ALTER TABLE augur_data.contributors OWNER TO augur;
-
---
--- Name: TABLE contributors; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.contributors IS 'For GitHub, this should be repeated from gh_login. for other systems, it should be that systems login. 
-Github now allows a user to change their login name, but their user id remains the same in this case. So, the natural key is the combination of id and login, but there should never be repeated logins. ';
-
-
---
--- Name: COLUMN contributors.cntrb_login; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.cntrb_login IS 'Will be a double population with the same value as gh_login for github, but the local value for other systems. ';
-
-
---
--- Name: COLUMN contributors.cntrb_email; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.cntrb_email IS 'This needs to be here for matching contributor ids, which are augur, to the commit information. ';
-
-
---
--- Name: COLUMN contributors.cntrb_type; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.cntrb_type IS 'Present in another models. It is not currently used in Augur. ';
-
-
---
--- Name: COLUMN contributors.gh_login; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.gh_login IS 'populated with the github user name for github originated data. ';
-
-
---
--- Name: COLUMN contributors.gl_web_url; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.gl_web_url IS '“web_url” value from these API calls to GitLab, all for the same user
-
-https://gitlab.com/api/v4/users?username=computationalmystic
-https://gitlab.com/api/v4/users?search=s@goggins.com
-https://gitlab.com/api/v4/users?search=outdoors@acm.org
-
-[
-  {
-    "id": 5481034,
-    "name": "sean goggins",
-    "username": "computationalmystic",
-    "state": "active",
-    "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon",
-    "web_url": "https://gitlab.com/computationalmystic"
-  }
-]';
-
-
---
--- Name: COLUMN contributors.gl_avatar_url; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.gl_avatar_url IS '“avatar_url” value from these API calls to GitLab, all for the same user
-
-https://gitlab.com/api/v4/users?username=computationalmystic
-https://gitlab.com/api/v4/users?search=s@goggins.com
-https://gitlab.com/api/v4/users?search=outdoors@acm.org
-
-[
-  {
-    "id": 5481034,
-    "name": "sean goggins",
-    "username": "computationalmystic",
-    "state": "active",
-    "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon",
-    "web_url": "https://gitlab.com/computationalmystic"
-  }
-]';
-
-
---
--- Name: COLUMN contributors.gl_state; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.gl_state IS '“state” value from these API calls to GitLab, all for the same user
-
-https://gitlab.com/api/v4/users?username=computationalmystic
-https://gitlab.com/api/v4/users?search=s@goggins.com
-https://gitlab.com/api/v4/users?search=outdoors@acm.org
-
-[
-  {
-    "id": 5481034,
-    "name": "sean goggins",
-    "username": "computationalmystic",
-    "state": "active",
-    "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon",
-    "web_url": "https://gitlab.com/computationalmystic"
-  }
-]';
-
-
---
--- Name: COLUMN contributors.gl_username; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.gl_username IS '“username” value from these API calls to GitLab, all for the same user
-
-https://gitlab.com/api/v4/users?username=computationalmystic
-https://gitlab.com/api/v4/users?search=s@goggins.com
-https://gitlab.com/api/v4/users?search=outdoors@acm.org
-
-[
-  {
-    "id": 5481034,
-    "name": "sean goggins",
-    "username": "computationalmystic",
-    "state": "active",
-    "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon",
-    "web_url": "https://gitlab.com/computationalmystic"
-  }
-]';
-
-
---
--- Name: COLUMN contributors.gl_full_name; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.gl_full_name IS '“name” value from these API calls to GitLab, all for the same user
-
-https://gitlab.com/api/v4/users?username=computationalmystic
-https://gitlab.com/api/v4/users?search=s@goggins.com
-https://gitlab.com/api/v4/users?search=outdoors@acm.org
-
-[
-  {
-    "id": 5481034,
-    "name": "sean goggins",
-    "username": "computationalmystic",
-    "state": "active",
-    "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon",
-    "web_url": "https://gitlab.com/computationalmystic"
-  }
-]';
-
-
---
--- Name: COLUMN contributors.gl_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.contributors.gl_id IS '"id" value from these API calls to GitLab, all for the same user
-
-https://gitlab.com/api/v4/users?username=computationalmystic
-https://gitlab.com/api/v4/users?search=s@goggins.com
-https://gitlab.com/api/v4/users?search=outdoors@acm.org
-
-[
-  {
-    "id": 5481034,
-    "name": "sean goggins",
-    "username": "computationalmystic",
-    "state": "active",
-    "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon",
-    "web_url": "https://gitlab.com/computationalmystic"
-  }
-]';
-
-
---
--- Name: contributors_aliases_cntrb_alias_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.contributors_aliases_cntrb_alias_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.contributors_aliases_cntrb_alias_id_seq OWNER TO augur;
-
---
--- Name: contributors_aliases; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.contributors_aliases (
-    cntrb_alias_id bigint DEFAULT nextval('augur_data.contributors_aliases_cntrb_alias_id_seq'::regclass) NOT NULL,
-    canonical_email character varying NOT NULL,
-    alias_email character varying NOT NULL,
-    cntrb_active smallint DEFAULT 1 NOT NULL,
-    cntrb_last_modified timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid NOT NULL
-);
-
-
-ALTER TABLE augur_data.contributors_aliases OWNER TO augur;
-
---
--- Name: TABLE contributors_aliases; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.contributors_aliases IS 'Every open source user may have more than one email used to make contributions over time. Augur selects the first email it encounters for a user as its “canonical_email”. 
-
-The canonical_email is also added to the contributors_aliases table, with the canonical_email and alias_email being identical.  Using this strategy, an email search will only need to join the alias table for basic email information, and can then more easily map the canonical email from each alias row to the same, more detailed information in the contributors table for a user. ';
-
-
---
--- Name: contributors_aliases_cntrb_a_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.contributors_aliases_cntrb_a_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.contributors_aliases_cntrb_a_id_seq OWNER TO augur;
-
---
--- Name: contributors_cntrb_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.contributors_cntrb_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.contributors_cntrb_id_seq OWNER TO augur;
-
---
--- Name: contributors_history_cntrb_history_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.contributors_history_cntrb_history_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.contributors_history_cntrb_history_id_seq OWNER TO augur;
-
---
--- Name: discourse_insights_msg_discourse_id_seq1; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.discourse_insights_msg_discourse_id_seq1
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.discourse_insights_msg_discourse_id_seq1 OWNER TO augur;
-
---
--- Name: discourse_insights; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.discourse_insights (
-    msg_discourse_id bigint DEFAULT nextval('augur_data.discourse_insights_msg_discourse_id_seq1'::regclass) NOT NULL,
-    msg_id bigint,
-    discourse_act character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.discourse_insights OWNER TO augur;
-
---
--- Name: TABLE discourse_insights; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.discourse_insights IS 'This table is populated by the “Discourse_Analysis_Worker”. It examines sequential discourse, using computational linguistic methods, to draw statistical inferences regarding the discourse in a particular comment thread. ';
-
-
---
--- Name: discourse_insights_msg_discourse_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.discourse_insights_msg_discourse_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.discourse_insights_msg_discourse_id_seq OWNER TO augur;
-
---
--- Name: dm_repo_annual; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.dm_repo_annual (
-    repo_id bigint NOT NULL,
-    email character varying NOT NULL,
-    affiliation character varying DEFAULT 'NULL'::character varying,
-    year smallint NOT NULL,
-    added bigint NOT NULL,
-    removed bigint NOT NULL,
-    whitespace bigint NOT NULL,
-    files bigint NOT NULL,
-    patches bigint NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.dm_repo_annual OWNER TO augur;
-
---
--- Name: dm_repo_group_annual; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.dm_repo_group_annual (
-    repo_group_id bigint NOT NULL,
-    email character varying NOT NULL,
-    affiliation character varying DEFAULT 'NULL'::character varying,
-    year smallint NOT NULL,
-    added bigint NOT NULL,
-    removed bigint NOT NULL,
-    whitespace bigint NOT NULL,
-    files bigint NOT NULL,
-    patches bigint NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.dm_repo_group_annual OWNER TO augur;
-
---
--- Name: dm_repo_group_monthly; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.dm_repo_group_monthly (
-    repo_group_id bigint NOT NULL,
-    email character varying NOT NULL,
-    affiliation character varying DEFAULT 'NULL'::character varying,
-    month smallint NOT NULL,
-    year smallint NOT NULL,
-    added bigint NOT NULL,
-    removed bigint NOT NULL,
-    whitespace bigint NOT NULL,
-    files bigint NOT NULL,
-    patches bigint NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.dm_repo_group_monthly OWNER TO augur;
-
---
--- Name: dm_repo_group_weekly; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.dm_repo_group_weekly (
-    repo_group_id bigint NOT NULL,
-    email character varying NOT NULL,
-    affiliation character varying DEFAULT 'NULL'::character varying,
-    week smallint NOT NULL,
-    year smallint NOT NULL,
-    added bigint NOT NULL,
-    removed bigint NOT NULL,
-    whitespace bigint NOT NULL,
-    files bigint NOT NULL,
-    patches bigint NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.dm_repo_group_weekly OWNER TO augur;
-
---
--- Name: dm_repo_monthly; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.dm_repo_monthly (
-    repo_id bigint NOT NULL,
-    email character varying NOT NULL,
-    affiliation character varying DEFAULT 'NULL'::character varying,
-    month smallint NOT NULL,
-    year smallint NOT NULL,
-    added bigint NOT NULL,
-    removed bigint NOT NULL,
-    whitespace bigint NOT NULL,
-    files bigint NOT NULL,
-    patches bigint NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.dm_repo_monthly OWNER TO augur;
-
---
--- Name: dm_repo_weekly; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.dm_repo_weekly (
-    repo_id bigint NOT NULL,
-    email character varying NOT NULL,
-    affiliation character varying DEFAULT 'NULL'::character varying,
-    week smallint NOT NULL,
-    year smallint NOT NULL,
-    added bigint NOT NULL,
-    removed bigint NOT NULL,
-    whitespace bigint NOT NULL,
-    files bigint NOT NULL,
-    patches bigint NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.dm_repo_weekly OWNER TO augur;
-
---
--- Name: exclude; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.exclude (
-    id integer NOT NULL,
-    projects_id integer NOT NULL,
-    email character varying DEFAULT 'NULL'::character varying,
-    domain character varying DEFAULT 'NULL'::character varying
-);
-
-
-ALTER TABLE augur_data.exclude OWNER TO augur;
-
---
--- Name: issue_assignees_issue_assignee_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.issue_assignees_issue_assignee_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.issue_assignees_issue_assignee_id_seq OWNER TO augur;
-
---
--- Name: issue_assignees; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.issue_assignees (
-    issue_assignee_id bigint DEFAULT nextval('augur_data.issue_assignees_issue_assignee_id_seq'::regclass) NOT NULL,
-    issue_id bigint,
-    repo_id bigint,
-    issue_assignee_src_id bigint,
-    issue_assignee_src_node character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.issue_assignees OWNER TO augur;
-
---
--- Name: COLUMN issue_assignees.issue_assignee_src_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issue_assignees.issue_assignee_src_id IS 'This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.';
-
-
---
--- Name: COLUMN issue_assignees.issue_assignee_src_node; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issue_assignees.issue_assignee_src_node IS 'This character based identifier comes from the source. In the case of GitHub, it is the id that is the second field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.';
-
-
---
--- Name: issue_events_event_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.issue_events_event_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.issue_events_event_id_seq OWNER TO augur;
-
---
--- Name: issue_events; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.issue_events (
-    event_id bigint DEFAULT nextval('augur_data.issue_events_event_id_seq'::regclass) NOT NULL,
-    issue_id bigint NOT NULL,
-    repo_id bigint,
-    action character varying NOT NULL,
-    action_commit_hash character varying,
-    created_at timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
-    node_id character varying,
-    node_url character varying,
-    platform_id bigint NOT NULL,
-    issue_event_src_id bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.issue_events OWNER TO augur;
-
---
--- Name: COLUMN issue_events.node_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issue_events.node_id IS 'This should be renamed to issue_event_src_node_id, as its the varchar identifier in GitHub and likely common in other sources as well. However, since it was created before we came to this naming standard and workers are built around it, we have it simply named as node_id. Anywhere you see node_id in the schema, it comes from GitHubs terminology.';
-
-
---
--- Name: COLUMN issue_events.issue_event_src_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issue_events.issue_event_src_id IS 'This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API';
-
-
---
--- Name: issue_labels_issue_label_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.issue_labels_issue_label_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.issue_labels_issue_label_id_seq OWNER TO augur;
-
---
--- Name: issue_labels; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.issue_labels (
-    issue_label_id bigint DEFAULT nextval('augur_data.issue_labels_issue_label_id_seq'::regclass) NOT NULL,
-    issue_id bigint,
-    repo_id bigint,
-    label_text character varying,
-    label_description character varying,
-    label_color character varying,
-    label_src_id bigint,
-    label_src_node_id character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.issue_labels OWNER TO augur;
-
---
--- Name: COLUMN issue_labels.label_src_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issue_labels.label_src_id IS 'This character based identifier (node) comes from the source. In the case of GitHub, it is the id that is the second field returned from the issue events API JSON subsection for issues.';
-
-
---
--- Name: issue_message_ref_issue_msg_ref_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.issue_message_ref_issue_msg_ref_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.issue_message_ref_issue_msg_ref_id_seq OWNER TO augur;
-
---
--- Name: issue_message_ref; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.issue_message_ref (
-    issue_msg_ref_id bigint DEFAULT nextval('augur_data.issue_message_ref_issue_msg_ref_id_seq'::regclass) NOT NULL,
-    issue_id bigint,
-    repo_id bigint,
-    msg_id bigint,
-    issue_msg_ref_src_node_id character varying,
-    issue_msg_ref_src_comment_id bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.issue_message_ref OWNER TO augur;
-
---
--- Name: COLUMN issue_message_ref.issue_msg_ref_src_node_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issue_message_ref.issue_msg_ref_src_node_id IS 'This character based identifier comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue comments API';
-
-
---
--- Name: COLUMN issue_message_ref.issue_msg_ref_src_comment_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issue_message_ref.issue_msg_ref_src_comment_id IS 'This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue comments API';
-
-
---
--- Name: issue_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.issue_seq
-    START WITH 31000
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.issue_seq OWNER TO augur;
-
---
--- Name: issues; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.issues (
-    issue_id bigint DEFAULT nextval('augur_data.issue_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    pull_request bigint,
-    pull_request_id bigint,
-    created_at timestamp(0) without time zone,
-    issue_title character varying,
-    issue_body character varying,
-    comment_count bigint,
-    updated_at timestamp(0) without time zone,
-    closed_at timestamp(0) without time zone,
-    due_on timestamp(0) without time zone,
-    repository_url character varying,
-    issue_url character varying,
-    labels_url character varying,
-    comments_url character varying,
-    events_url character varying,
-    html_url character varying,
-    issue_state character varying,
-    issue_node_id character varying,
-    gh_issue_number bigint,
-    gh_issue_id bigint,
-    gh_user_id bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    reporter_id uuid,
-    cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.issues OWNER TO augur;
-
---
--- Name: COLUMN issues.reporter_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issues.reporter_id IS 'The ID of the person who opened the issue. ';
-
-
---
--- Name: COLUMN issues.cntrb_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.issues.cntrb_id IS 'The ID of the person who closed the issue. ';
-
-
---
--- Name: libraries_library_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.libraries_library_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.libraries_library_id_seq OWNER TO augur;
-
---
--- Name: libraries; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.libraries (
-    library_id bigint DEFAULT nextval('augur_data.libraries_library_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    platform character varying,
-    name character varying,
-    created_timestamp timestamp(0) without time zone DEFAULT NULL::timestamp without time zone,
-    updated_timestamp timestamp(0) without time zone DEFAULT NULL::timestamp without time zone,
-    library_description character varying(2000) DEFAULT NULL::character varying,
-    keywords character varying,
-    library_homepage character varying(1000) DEFAULT NULL::character varying,
-    license character varying,
-    version_count integer,
-    latest_release_timestamp character varying,
-    latest_release_number character varying,
-    package_manager_id character varying,
-    dependency_count integer,
-    dependent_library_count integer,
-    primary_language character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.libraries OWNER TO augur;
-
---
--- Name: library_dependencies_lib_dependency_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.library_dependencies_lib_dependency_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.library_dependencies_lib_dependency_id_seq OWNER TO augur;
-
---
--- Name: library_dependencies; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.library_dependencies (
-    lib_dependency_id bigint DEFAULT nextval('augur_data.library_dependencies_lib_dependency_id_seq'::regclass) NOT NULL,
-    library_id bigint,
-    manifest_platform character varying,
-    manifest_filepath character varying(1000) DEFAULT NULL::character varying,
-    manifest_kind character varying,
-    repo_id_branch character varying NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.library_dependencies OWNER TO augur;
-
---
--- Name: library_version_library_version_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.library_version_library_version_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.library_version_library_version_id_seq OWNER TO augur;
-
---
--- Name: library_version; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.library_version (
-    library_version_id bigint DEFAULT nextval('augur_data.library_version_library_version_id_seq'::regclass) NOT NULL,
-    library_id bigint,
-    library_platform character varying,
-    version_number character varying,
-    version_release_date timestamp(0) without time zone DEFAULT NULL::timestamp without time zone,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.library_version OWNER TO augur;
-
---
--- Name: lstm_anomaly_models_model_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.lstm_anomaly_models_model_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.lstm_anomaly_models_model_id_seq OWNER TO augur;
-
---
--- Name: lstm_anomaly_models; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.lstm_anomaly_models (
-    model_id bigint DEFAULT nextval('augur_data.lstm_anomaly_models_model_id_seq'::regclass) NOT NULL,
-    model_name character varying,
-    model_description character varying,
-    look_back_days bigint,
-    training_days bigint,
-    batch_size bigint,
-    metric character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.lstm_anomaly_models OWNER TO augur;
-
---
--- Name: lstm_anomaly_results_result_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.lstm_anomaly_results_result_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.lstm_anomaly_results_result_id_seq OWNER TO augur;
-
---
--- Name: lstm_anomaly_results; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.lstm_anomaly_results (
-    result_id bigint DEFAULT nextval('augur_data.lstm_anomaly_results_result_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    repo_category character varying,
-    model_id bigint,
-    metric character varying,
-    contamination_factor double precision,
-    mean_absolute_error double precision,
-    remarks character varying,
-    metric_field character varying,
-    mean_absolute_actual_value double precision,
-    mean_absolute_prediction_value double precision,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.lstm_anomaly_results OWNER TO augur;
-
---
--- Name: COLUMN lstm_anomaly_results.metric_field; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.lstm_anomaly_results.metric_field IS 'This is a listing of all of the endpoint fields included in the generation of the metric. Sometimes there is one, sometimes there is more than one. This will list them all. ';
-
-
---
--- Name: message_msg_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.message_msg_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.message_msg_id_seq OWNER TO augur;
-
---
--- Name: message; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.message (
-    msg_id bigint DEFAULT nextval('augur_data.message_msg_id_seq'::regclass) NOT NULL,
-    rgls_id bigint,
-    platform_msg_id bigint,
-    platform_node_id character varying,
-    repo_id bigint,
-    msg_text character varying,
-    msg_timestamp timestamp(0) without time zone,
-    msg_sender_email character varying,
-    msg_header character varying,
-    pltfrm_id bigint NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.message OWNER TO augur;
-
---
--- Name: COLUMN message.cntrb_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message.cntrb_id IS 'Not populated for mailing lists. Populated for GitHub issues. ';
-
-
---
--- Name: message_analysis_msg_analysis_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.message_analysis_msg_analysis_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.message_analysis_msg_analysis_id_seq OWNER TO augur;
-
---
--- Name: message_analysis; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.message_analysis (
-    msg_analysis_id bigint DEFAULT nextval('augur_data.message_analysis_msg_analysis_id_seq'::regclass) NOT NULL,
-    msg_id bigint,
-    worker_run_id bigint,
-    sentiment_score double precision,
-    reconstruction_error double precision,
-    novelty_flag boolean,
-    feedback_flag boolean,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.message_analysis OWNER TO augur;
-
---
--- Name: COLUMN message_analysis.worker_run_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_analysis.worker_run_id IS 'This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis.  ';
-
-
---
--- Name: COLUMN message_analysis.sentiment_score; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_analysis.sentiment_score IS 'A sentiment analysis score. Zero is neutral, negative numbers are negative sentiment, and positive numbers are positive sentiment. ';
-
-
---
--- Name: COLUMN message_analysis.reconstruction_error; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_analysis.reconstruction_error IS 'Each message is converted to a 250 dimensin doc2vec vector, so the reconstruction error is the difference between what the predicted vector and the actual vector.';
-
-
---
--- Name: COLUMN message_analysis.novelty_flag; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_analysis.novelty_flag IS 'This is an analysis of the degree to which the message is novel when compared to other messages in a repository.  For example when bots are producing numerous identical messages, the novelty score is low. It would also be a low novelty score when several people are making the same coment. ';
-
-
---
--- Name: COLUMN message_analysis.feedback_flag; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_analysis.feedback_flag IS 'This exists to provide the user with an opportunity provide feedback on the resulting the sentiment scores. ';
-
-
---
--- Name: message_analysis_summary_msg_summary_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.message_analysis_summary_msg_summary_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.message_analysis_summary_msg_summary_id_seq OWNER TO augur;
-
---
--- Name: message_analysis_summary; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.message_analysis_summary (
-    msg_summary_id bigint DEFAULT nextval('augur_data.message_analysis_summary_msg_summary_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    worker_run_id bigint,
-    positive_ratio double precision,
-    negative_ratio double precision,
-    novel_count bigint,
-    period timestamp(0) without time zone,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.message_analysis_summary OWNER TO augur;
-
---
--- Name: TABLE message_analysis_summary; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.message_analysis_summary IS 'In a relationally perfect world, we would have a table called “message_analysis_run” the incremented the “worker_run_id” for both message_analysis and message_analysis_summary. For now, we decided this was overkill. ';
-
-
---
--- Name: COLUMN message_analysis_summary.worker_run_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_analysis_summary.worker_run_id IS 'This value should reflect the worker_run_id for the messages summarized in the table. There is not a relation between these two tables for that purpose because its not *really*, relationaly a concept unless we create a third table for "worker_run_id", which we determined was unnecessarily complex. ';
-
-
---
--- Name: COLUMN message_analysis_summary.novel_count; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_analysis_summary.novel_count IS 'The number of messages identified as novel during the analyzed period';
-
-
---
--- Name: COLUMN message_analysis_summary.period; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_analysis_summary.period IS 'The whole timeline is divided into periods based on the definition of time period for analysis, which is user specified. Timestamp of the first period to look at, until the end of messages at the data of execution. ';
-
-
---
--- Name: message_sentiment_msg_analysis_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.message_sentiment_msg_analysis_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.message_sentiment_msg_analysis_id_seq OWNER TO augur;
-
---
--- Name: message_sentiment; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.message_sentiment (
-    msg_analysis_id bigint DEFAULT nextval('augur_data.message_sentiment_msg_analysis_id_seq'::regclass) NOT NULL,
-    msg_id bigint,
-    worker_run_id bigint,
-    sentiment_score double precision,
-    reconstruction_error double precision,
-    novelty_flag boolean,
-    feedback_flag boolean,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.message_sentiment OWNER TO augur;
-
---
--- Name: COLUMN message_sentiment.worker_run_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_sentiment.worker_run_id IS 'This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis.  ';
-
-
---
--- Name: COLUMN message_sentiment.sentiment_score; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_sentiment.sentiment_score IS 'A sentiment analysis score. Zero is neutral, negative numbers are negative sentiment, and positive numbers are positive sentiment. ';
-
-
---
--- Name: COLUMN message_sentiment.reconstruction_error; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_sentiment.reconstruction_error IS 'Each message is converted to a 250 dimensin doc2vec vector, so the reconstruction error is the difference between what the predicted vector and the actual vector.';
-
-
---
--- Name: COLUMN message_sentiment.novelty_flag; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_sentiment.novelty_flag IS 'This is an analysis of the degree to which the message is novel when compared to other messages in a repository.  For example when bots are producing numerous identical messages, the novelty score is low. It would also be a low novelty score when several people are making the same coment. ';
-
-
---
--- Name: COLUMN message_sentiment.feedback_flag; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_sentiment.feedback_flag IS 'This exists to provide the user with an opportunity provide feedback on the resulting the sentiment scores. ';
-
-
---
--- Name: message_sentiment_summary_msg_summary_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.message_sentiment_summary_msg_summary_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.message_sentiment_summary_msg_summary_id_seq OWNER TO augur;
-
---
--- Name: message_sentiment_summary; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.message_sentiment_summary (
-    msg_summary_id bigint DEFAULT nextval('augur_data.message_sentiment_summary_msg_summary_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    worker_run_id bigint,
-    positive_ratio double precision,
-    negative_ratio double precision,
-    novel_count bigint,
-    period timestamp(0) without time zone,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.message_sentiment_summary OWNER TO augur;
-
---
--- Name: TABLE message_sentiment_summary; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.message_sentiment_summary IS 'In a relationally perfect world, we would have a table called “message_sentiment_run” the incremented the “worker_run_id” for both message_sentiment and message_sentiment_summary. For now, we decided this was overkill. ';
-
-
---
--- Name: COLUMN message_sentiment_summary.worker_run_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_sentiment_summary.worker_run_id IS 'This value should reflect the worker_run_id for the messages summarized in the table. There is not a relation between these two tables for that purpose because its not *really*, relationaly a concept unless we create a third table for "worker_run_id", which we determined was unnecessarily complex. ';
-
-
---
--- Name: COLUMN message_sentiment_summary.novel_count; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_sentiment_summary.novel_count IS 'The number of messages identified as novel during the analyzed period';
-
-
---
--- Name: COLUMN message_sentiment_summary.period; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.message_sentiment_summary.period IS 'The whole timeline is divided into periods based on the definition of time period for analysis, which is user specified. Timestamp of the first period to look at, until the end of messages at the data of execution. ';
-
-
---
--- Name: platform_pltfrm_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.platform_pltfrm_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.platform_pltfrm_id_seq OWNER TO augur;
-
---
--- Name: platform; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.platform (
-    pltfrm_id bigint DEFAULT nextval('augur_data.platform_pltfrm_id_seq'::regclass) NOT NULL,
-    pltfrm_name character varying,
-    pltfrm_version character varying,
-    pltfrm_release_date date,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.platform OWNER TO augur;
-
---
--- Name: pull_request_analysis_pull_request_analysis_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_analysis_pull_request_analysis_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_analysis_pull_request_analysis_id_seq OWNER TO augur;
-
---
--- Name: pull_request_analysis; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_analysis (
-    pull_request_analysis_id bigint DEFAULT nextval('augur_data.pull_request_analysis_pull_request_analysis_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    merge_probability numeric(256,250),
-    mechanism character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL
-);
-
-
-ALTER TABLE augur_data.pull_request_analysis OWNER TO augur;
-
---
--- Name: COLUMN pull_request_analysis.pull_request_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_analysis.pull_request_id IS 'It would be better if the pull request worker is run first to fetch the latest PRs before analyzing';
-
-
---
--- Name: COLUMN pull_request_analysis.merge_probability; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_analysis.merge_probability IS 'Indicates the probability of the PR being merged';
-
-
---
--- Name: COLUMN pull_request_analysis.mechanism; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_analysis.mechanism IS 'the ML model used for prediction (It is XGBoost Classifier at present)';
-
-
---
--- Name: pull_request_assignees_pr_assignee_map_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_assignees_pr_assignee_map_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_assignees_pr_assignee_map_id_seq OWNER TO augur;
-
---
--- Name: pull_request_assignees; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_assignees (
-    pr_assignee_map_id bigint DEFAULT nextval('augur_data.pull_request_assignees_pr_assignee_map_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    repo_id bigint,
-    pr_assignee_src_id bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    contrib_id uuid
-);
-
-
-ALTER TABLE augur_data.pull_request_assignees OWNER TO augur;
-
---
--- Name: pull_request_commits_pr_cmt_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_commits_pr_cmt_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_commits_pr_cmt_id_seq OWNER TO augur;
-
---
--- Name: pull_request_commits; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_commits (
-    pr_cmt_id bigint DEFAULT nextval('augur_data.pull_request_commits_pr_cmt_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    repo_id bigint,
-    pr_cmt_sha character varying,
-    pr_cmt_node_id character varying,
-    pr_cmt_message character varying,
-    pr_cmt_comments_url character varying,
-    pr_cmt_timestamp timestamp(0) without time zone,
-    pr_cmt_author_email character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    pr_cmt_author_cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.pull_request_commits OWNER TO augur;
-
---
--- Name: TABLE pull_request_commits; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.pull_request_commits IS 'Pull request commits are an enumeration of each commit associated with a pull request. 
-Not all pull requests are from a branch or fork into master. 
-The commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project).
-Therefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. 
-In cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. ';
-
-
---
--- Name: COLUMN pull_request_commits.pr_cmt_sha; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_commits.pr_cmt_sha IS 'This is the commit SHA for a pull request commit. If the PR is not to the master branch of the main repository (or, in rare cases, from it), then you will NOT find a corresponding commit SHA in the commit table. (see table comment for further explanation). ';
-
-
---
--- Name: pull_request_events_pr_event_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_events_pr_event_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_events_pr_event_id_seq OWNER TO augur;
-
---
--- Name: pull_request_events; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_events (
-    pr_event_id bigint DEFAULT nextval('augur_data.pull_request_events_pr_event_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint NOT NULL,
-    repo_id bigint,
-    action character varying NOT NULL,
-    action_commit_hash character varying,
-    created_at timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
-    issue_event_src_id bigint,
-    node_id character varying,
-    node_url character varying,
-    platform_id bigint DEFAULT 25150 NOT NULL,
-    pr_platform_event_id bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.pull_request_events OWNER TO augur;
-
---
--- Name: COLUMN pull_request_events.issue_event_src_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_events.issue_event_src_id IS 'This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API';
-
-
---
--- Name: COLUMN pull_request_events.node_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_events.node_id IS 'This should be renamed to issue_event_src_node_id, as its the varchar identifier in GitHub and likely common in other sources as well. However, since it was created before we came to this naming standard and workers are built around it, we have it simply named as node_id. Anywhere you see node_id in the schema, it comes from GitHubs terminology.';
-
-
---
--- Name: pull_request_files_pr_file_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_files_pr_file_id_seq
-    START WITH 25150
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_files_pr_file_id_seq OWNER TO augur;
-
---
--- Name: pull_request_files; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_files (
-    pr_file_id bigint DEFAULT nextval('augur_data.pull_request_files_pr_file_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    repo_id bigint,
-    pr_file_additions bigint,
-    pr_file_deletions bigint,
-    pr_file_path character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.pull_request_files OWNER TO augur;
-
---
--- Name: TABLE pull_request_files; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.pull_request_files IS 'Pull request commits are an enumeration of each commit associated with a pull request. 
-Not all pull requests are from a branch or fork into master. 
-The commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project).
-Therefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. 
-In cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. ';
-
-
---
--- Name: pull_request_labels_pr_label_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_labels_pr_label_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_labels_pr_label_id_seq OWNER TO augur;
-
---
--- Name: pull_request_labels; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_labels (
-    pr_label_id bigint DEFAULT nextval('augur_data.pull_request_labels_pr_label_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    repo_id bigint,
-    pr_src_id bigint,
-    pr_src_node_id character varying,
-    pr_src_url character varying,
-    pr_src_description character varying,
-    pr_src_color character varying,
-    pr_src_default_bool boolean,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.pull_request_labels OWNER TO augur;
-
---
--- Name: pull_request_message_ref_pr_msg_ref_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_message_ref_pr_msg_ref_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_message_ref_pr_msg_ref_id_seq OWNER TO augur;
-
---
--- Name: pull_request_message_ref; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_message_ref (
-    pr_msg_ref_id bigint DEFAULT nextval('augur_data.pull_request_message_ref_pr_msg_ref_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    repo_id bigint,
-    msg_id bigint,
-    pr_message_ref_src_comment_id bigint,
-    pr_message_ref_src_node_id character varying,
-    pr_issue_url character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.pull_request_message_ref OWNER TO augur;
-
---
--- Name: pull_request_meta_pr_repo_meta_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_meta_pr_repo_meta_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_meta_pr_repo_meta_id_seq OWNER TO augur;
-
---
--- Name: pull_request_meta; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_meta (
-    pr_repo_meta_id bigint DEFAULT nextval('augur_data.pull_request_meta_pr_repo_meta_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    repo_id bigint,
-    pr_head_or_base character varying,
-    pr_src_meta_label character varying,
-    pr_src_meta_ref character varying,
-    pr_sha character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.pull_request_meta OWNER TO augur;
-
---
--- Name: TABLE pull_request_meta; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.pull_request_meta IS 'Pull requests contain referencing metadata.  There are a few columns that are discrete. There are also head and base designations for the repo on each side of the pull request. Similar functions exist in GitLab, though the language here is based on GitHub. The JSON Being adapted to as of the development of this schema is here:      "base": {       "label": "chaoss:dev",       "ref": "dev",       "sha": "dc6c6f3947f7dc84ecba3d8bda641ef786e7027d",       "user": {         "login": "chaoss",         "id": 29740296,         "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2",         "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4",         "gravatar_id": "",         "url": "https://api.github.com/users/chaoss",         "html_url": "https://github.com/chaoss",         "followers_url": "https://api.github.com/users/chaoss/followers",         "following_url": "https://api.github.com/users/chaoss/following{/other_user}",         "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}",         "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}",         "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions",         "organizations_url": "https://api.github.com/users/chaoss/orgs",         "repos_url": "https://api.github.com/users/chaoss/repos",         "events_url": "https://api.github.com/users/chaoss/events{/privacy}",         "received_events_url": "https://api.github.com/users/chaoss/received_events",         "type": "Organization",         "site_admin": false       },       "repo": {         "id": 78134122,         "node_id": "MDEwOlJlcG9zaXRvcnk3ODEzNDEyMg==",         "name": "augur",         "full_name": "chaoss/augur",         "private": false,         "owner": {           "login": "chaoss",           "id": 29740296,           "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2",           "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4",           "gravatar_id": "",           "url": "https://api.github.com/users/chaoss",           "html_url": "https://github.com/chaoss",           "followers_url": "https://api.github.com/users/chaoss/followers",           "following_url": "https://api.github.com/users/chaoss/following{/other_user}",           "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}",           "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}",           "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions",           "organizations_url": "https://api.github.com/users/chaoss/orgs",           "repos_url": "https://api.github.com/users/chaoss/repos",           "events_url": "https://api.github.com/users/chaoss/events{/privacy}",           "received_events_url": "https://api.github.com/users/chaoss/received_events",           "type": "Organization",           "site_admin": false         }, ';
-
-
---
--- Name: COLUMN pull_request_meta.pr_head_or_base; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_meta.pr_head_or_base IS 'Each pull request should have one and only one head record; and one and only one base record. ';
-
-
---
--- Name: COLUMN pull_request_meta.pr_src_meta_label; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_meta.pr_src_meta_label IS 'This is a representation of the repo:branch information in the pull request. Head is issueing the pull request and base is taking the pull request. For example:  (We do not store all of this)
-
- "head": {
-      "label": "chaoss:pull-request-worker",
-      "ref": "pull-request-worker",
-      "sha": "6b380c3d6d625616f79d702612ebab6d204614f2",
-      "user": {
-        "login": "chaoss",
-        "id": 29740296,
-        "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2",
-        "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4",
-        "gravatar_id": "",
-        "url": "https://api.github.com/users/chaoss",
-        "html_url": "https://github.com/chaoss",
-        "followers_url": "https://api.github.com/users/chaoss/followers",
-        "following_url": "https://api.github.com/users/chaoss/following{/other_user}",
-        "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}",
-        "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}",
-        "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions",
-        "organizations_url": "https://api.github.com/users/chaoss/orgs",
-        "repos_url": "https://api.github.com/users/chaoss/repos",
-        "events_url": "https://api.github.com/users/chaoss/events{/privacy}",
-        "received_events_url": "https://api.github.com/users/chaoss/received_events",
-        "type": "Organization",
-        "site_admin": false
-      },
-      "repo": {
-        "id": 78134122,
-        "node_id": "MDEwOlJlcG9zaXRvcnk3ODEzNDEyMg==",
-        "name": "augur",
-        "full_name": "chaoss/augur",
-        "private": false,
-        "owner": {
-          "login": "chaoss",
-          "id": 29740296,
-          "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2",
-          "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4",
-          "gravatar_id": "",
-          "url": "https://api.github.com/users/chaoss",
-          "html_url": "https://github.com/chaoss",
-          "followers_url": "https://api.github.com/users/chaoss/followers",
-          "following_url": "https://api.github.com/users/chaoss/following{/other_user}",
-          "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}",
-          "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}",
-          "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions",
-          "organizations_url": "https://api.github.com/users/chaoss/orgs",
-          "repos_url": "https://api.github.com/users/chaoss/repos",
-          "events_url": "https://api.github.com/users/chaoss/events{/privacy}",
-          "received_events_url": "https://api.github.com/users/chaoss/received_events",
-          "type": "Organization",
-          "site_admin": false
-        },
-        "html_url": "https://github.com/chaoss/augur",
-        "description": "Python library and web service for Open Source Software Health and Sustainability metrics & data collection.",
-        "fork": false,
-        "url": "https://api.github.com/repos/chaoss/augur",
-        "forks_url": "https://api.github.com/repos/chaoss/augur/forks",
-        "keys_url": "https://api.github.com/repos/chaoss/augur/keys{/key_id}",
-        "collaborators_url": "https://api.github.com/repos/chaoss/augur/collaborators{/collaborator}",
-        "teams_url": "https://api.github.com/repos/chaoss/augur/teams",
-        "hooks_url": "https://api.github.com/repos/chaoss/augur/hooks",
-        "issue_events_url": "https://api.github.com/repos/chaoss/augur/issues/events{/number}",
-        "events_url": "https://api.github.com/repos/chaoss/augur/events",
-        "assignees_url": "https://api.github.com/repos/chaoss/augur/assignees{/user}",
-        "branches_url": "https://api.github.com/repos/chaoss/augur/branches{/branch}",
-        "tags_url": "https://api.github.com/repos/chaoss/augur/tags",
-        "blobs_url": "https://api.github.com/repos/chaoss/augur/git/blobs{/sha}",
-        "git_tags_url": "https://api.github.com/repos/chaoss/augur/git/tags{/sha}",
-        "git_refs_url": "https://api.github.com/repos/chaoss/augur/git/refs{/sha}",
-        "trees_url": "https://api.github.com/repos/chaoss/augur/git/trees{/sha}",
-        "statuses_url": "https://api.github.com/repos/chaoss/augur/statuses/{sha}",
-        "languages_url": "https://api.github.com/repos/chaoss/augur/languages",
-        "stargazers_url": "https://api.github.com/repos/chaoss/augur/stargazers",
-        "contributors_url": "https://api.github.com/repos/chaoss/augur/contributors",
-        "subscribers_url": "https://api.github.com/repos/chaoss/augur/subscribers",
-        "subscription_url": "https://api.github.com/repos/chaoss/augur/subscription",
-        "commits_url": "https://api.github.com/repos/chaoss/augur/commits{/sha}",
-        "git_commits_url": "https://api.github.com/repos/chaoss/augur/git/commits{/sha}",
-        "comments_url": "https://api.github.com/repos/chaoss/augur/comments{/number}",
-        "issue_comment_url": "https://api.github.com/repos/chaoss/augur/issues/comments{/number}",
-        "contents_url": "https://api.github.com/repos/chaoss/augur/contents/{+path}",
-        "compare_url": "https://api.github.com/repos/chaoss/augur/compare/{base}...{head}",
-        "merges_url": "https://api.github.com/repos/chaoss/augur/merges",
-        "archive_url": "https://api.github.com/repos/chaoss/augur/{archive_format}{/ref}",
-        "downloads_url": "https://api.github.com/repos/chaoss/augur/downloads",
-        "issues_url": "https://api.github.com/repos/chaoss/augur/issues{/number}",
-        "pulls_url": "https://api.github.com/repos/chaoss/augur/pulls{/number}",
-        "milestones_url": "https://api.github.com/repos/chaoss/augur/milestones{/number}",
-        "notifications_url": "https://api.github.com/repos/chaoss/augur/notifications{?since,all,participating}",
-        "labels_url": "https://api.github.com/repos/chaoss/augur/labels{/name}",
-        "releases_url": "https://api.github.com/repos/chaoss/augur/releases{/id}",
-        "deployments_url": "https://api.github.com/repos/chaoss/augur/deployments",
-        "created_at": "2017-01-05T17:34:54Z",
-        "updated_at": "2019-11-15T00:56:12Z",
-        "pushed_at": "2019-12-02T06:27:26Z",
-        "git_url": "git://github.com/chaoss/augur.git",
-        "ssh_url": "git@github.com:chaoss/augur.git",
-        "clone_url": "https://github.com/chaoss/augur.git",
-        "svn_url": "https://github.com/chaoss/augur",
-        "homepage": "http://augur.osshealth.io/",
-        "size": 82004,
-        "stargazers_count": 153,
-        "watchers_count": 153,
-        "language": "Python",
-        "has_issues": true,
-        "has_projects": false,
-        "has_downloads": true,
-        "has_wiki": false,
-        "has_pages": true,
-        "forks_count": 205,
-        "mirror_url": null,
-        "archived": false,
-        "disabled": false,
-        "open_issues_count": 14,
-        "license": {
-          "key": "mit",
-          "name": "MIT License",
-          "spdx_id": "MIT",
-          "url": "https://api.github.com/licenses/mit",
-          "node_id": "MDc6TGljZW5zZTEz"
-        },
-        "forks": 205,
-        "open_issues": 14,
-        "watchers": 153,
-        "default_branch": "master"
-      }
-    },
-    "base": {
-      "label": "chaoss:dev",
-      "ref": "dev",
-      "sha": "bfd2d34b51659613dd842cf83c3873f7699c2a0e",
-      "user": {
-        "login": "chaoss",
-        "id": 29740296,
-        "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2",
-        "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4",
-        "gravatar_id": "",
-        "url": "https://api.github.com/users/chaoss",
-        "html_url": "https://github.com/chaoss",
-        "followers_url": "https://api.github.com/users/chaoss/followers",
-        "following_url": "https://api.github.com/users/chaoss/following{/other_user}",
-        "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}",
-        "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}",
-        "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions",
-        "organizations_url": "https://api.github.com/users/chaoss/orgs",
-        "repos_url": "https://api.github.com/users/chaoss/repos",
-        "events_url": "https://api.github.com/users/chaoss/events{/privacy}",
-        "received_events_url": "https://api.github.com/users/chaoss/received_events",
-        "type": "Organization",
-        "site_admin": false
-      },
-      "repo": {
-        "id": 78134122,
-        "node_id": "MDEwOlJlcG9zaXRvcnk3ODEzNDEyMg==",
-        "name": "augur",
-        "full_name": "chaoss/augur",
-        "private": false,
-        "owner": {
-          "login": "chaoss",
-          "id": 29740296,
-          "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2",
-          "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4",
-          "gravatar_id": "",
-          "url": "https://api.github.com/users/chaoss",
-          "html_url": "https://github.com/chaoss",
-          "followers_url": "https://api.github.com/users/chaoss/followers",
-          "following_url": "https://api.github.com/users/chaoss/following{/other_user}",
-          "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}",
-          "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}",
-          "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions",
-          "organizations_url": "https://api.github.com/users/chaoss/orgs",
-          "repos_url": "https://api.github.com/users/chaoss/repos",
-          "events_url": "https://api.github.com/users/chaoss/events{/privacy}",
-          "received_events_url": "https://api.github.com/users/chaoss/received_events",
-          "type": "Organization",
-          "site_admin": false
-        },
-';
-
-
---
--- Name: pull_request_repo_pr_repo_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_repo_pr_repo_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_repo_pr_repo_id_seq OWNER TO augur;
-
---
--- Name: pull_request_repo; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_repo (
-    pr_repo_id bigint DEFAULT nextval('augur_data.pull_request_repo_pr_repo_id_seq'::regclass) NOT NULL,
-    pr_repo_meta_id bigint,
-    pr_repo_head_or_base character varying,
-    pr_src_repo_id bigint,
-    pr_src_node_id character varying,
-    pr_repo_name character varying,
-    pr_repo_full_name character varying,
-    pr_repo_private_bool boolean,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    pr_cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.pull_request_repo OWNER TO augur;
-
---
--- Name: TABLE pull_request_repo; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.pull_request_repo IS 'This table is for storing information about forks that exist as part of a pull request. Generally we do not want to track these like ordinary repositories. ';
-
-
---
--- Name: COLUMN pull_request_repo.pr_repo_head_or_base; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_repo.pr_repo_head_or_base IS 'For ease of validation checking, we should determine if the repository referenced is the head or base of the pull request. Each pull request should have one and only one of these, which is not enforcable easily in the database.';
-
-
---
--- Name: pull_request_review_message_ref_pr_review_msg_ref_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq OWNER TO augur;
-
---
--- Name: pull_request_review_message_ref; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_review_message_ref (
-    pr_review_msg_ref_id bigint DEFAULT nextval('augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq'::regclass) NOT NULL,
-    pr_review_id bigint NOT NULL,
-    repo_id bigint,
-    msg_id bigint NOT NULL,
-    pr_review_msg_url character varying,
-    pr_review_src_id bigint,
-    pr_review_msg_src_id bigint,
-    pr_review_msg_node_id character varying,
-    pr_review_msg_diff_hunk character varying,
-    pr_review_msg_path character varying,
-    pr_review_msg_position bigint,
-    pr_review_msg_original_position bigint,
-    pr_review_msg_commit_id character varying,
-    pr_review_msg_original_commit_id character varying,
-    pr_review_msg_updated_at timestamp(6) without time zone,
-    pr_review_msg_html_url character varying,
-    pr_url character varying,
-    pr_review_msg_author_association character varying,
-    pr_review_msg_start_line bigint,
-    pr_review_msg_original_start_line bigint,
-    pr_review_msg_start_side character varying,
-    pr_review_msg_line bigint,
-    pr_review_msg_original_line bigint,
-    pr_review_msg_side character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.pull_request_review_message_ref OWNER TO augur;
-
---
--- Name: pull_request_reviewers_pr_reviewer_map_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_reviewers_pr_reviewer_map_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_reviewers_pr_reviewer_map_id_seq OWNER TO augur;
-
---
--- Name: pull_request_reviewers; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_reviewers (
-    pr_reviewer_map_id bigint DEFAULT nextval('augur_data.pull_request_reviewers_pr_reviewer_map_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    pr_source_id bigint,
-    repo_id bigint,
-    pr_reviewer_src_id bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid
-);
-
-
-ALTER TABLE augur_data.pull_request_reviewers OWNER TO augur;
-
---
--- Name: COLUMN pull_request_reviewers.pr_source_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_reviewers.pr_source_id IS 'The platform ID for the pull/merge request. Used as part of the natural key, along with pr_reviewer_src_id in this table. ';
-
-
---
--- Name: COLUMN pull_request_reviewers.pr_reviewer_src_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_request_reviewers.pr_reviewer_src_id IS 'The platform ID for the pull/merge request reviewer. Used as part of the natural key, along with pr_source_id in this table. ';
-
-
---
--- Name: pull_request_reviews_pr_review_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_reviews_pr_review_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_reviews_pr_review_id_seq OWNER TO augur;
-
---
--- Name: pull_request_reviews; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_reviews (
-    pr_review_id bigint DEFAULT nextval('augur_data.pull_request_reviews_pr_review_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint NOT NULL,
-    repo_id bigint,
-    pr_review_author_association character varying,
-    pr_review_state character varying,
-    pr_review_body character varying,
-    pr_review_submitted_at timestamp(6) without time zone,
-    pr_review_src_id bigint,
-    pr_review_node_id character varying,
-    pr_review_html_url character varying,
-    pr_review_pull_request_url character varying,
-    pr_review_commit_id character varying,
-    platform_id bigint DEFAULT 25150,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    cntrb_id uuid NOT NULL
-);
-
-
-ALTER TABLE augur_data.pull_request_reviews OWNER TO augur;
-
---
--- Name: pull_request_teams_pr_team_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_request_teams_pr_team_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_request_teams_pr_team_id_seq OWNER TO augur;
-
---
--- Name: pull_request_teams; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_request_teams (
-    pr_team_id bigint DEFAULT nextval('augur_data.pull_request_teams_pr_team_id_seq'::regclass) NOT NULL,
-    pull_request_id bigint,
-    pr_src_team_id bigint,
-    pr_src_team_node character varying,
-    pr_src_team_url character varying,
-    pr_team_name character varying,
-    pr_team_slug character varying,
-    pr_team_description character varying,
-    pr_team_privacy character varying,
-    pr_team_permission character varying,
-    pr_team_src_members_url character varying,
-    pr_team_src_repositories_url character varying,
-    pr_team_parent_id bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.pull_request_teams OWNER TO augur;
-
---
--- Name: pull_requests_pull_request_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.pull_requests_pull_request_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.pull_requests_pull_request_id_seq OWNER TO augur;
-
---
--- Name: pull_requests; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.pull_requests (
-    pull_request_id bigint DEFAULT nextval('augur_data.pull_requests_pull_request_id_seq'::regclass) NOT NULL,
-    repo_id bigint DEFAULT 0,
-    pr_url character varying,
-    pr_src_id bigint,
-    pr_src_node_id character varying,
-    pr_html_url character varying,
-    pr_diff_url character varying,
-    pr_patch_url character varying,
-    pr_issue_url character varying,
-    pr_augur_issue_id bigint,
-    pr_src_number bigint,
-    pr_src_state character varying,
-    pr_src_locked boolean,
-    pr_src_title character varying,
-    pr_body text,
-    pr_created_at timestamp(0) without time zone,
-    pr_updated_at timestamp(0) without time zone,
-    pr_closed_at timestamp(0) without time zone,
-    pr_merged_at timestamp(0) without time zone,
-    pr_merge_commit_sha character varying,
-    pr_teams bigint,
-    pr_milestone character varying,
-    pr_commits_url character varying,
-    pr_review_comments_url character varying,
-    pr_review_comment_url character varying,
-    pr_comments_url character varying,
-    pr_statuses_url character varying,
-    pr_meta_head_id character varying,
-    pr_meta_base_id character varying,
-    pr_src_issue_url character varying,
-    pr_src_comments_url character varying,
-    pr_src_review_comments_url character varying,
-    pr_src_commits_url character varying,
-    pr_src_statuses_url character varying,
-    pr_src_author_association character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    pr_augur_contributor_id uuid
-);
-
-
-ALTER TABLE augur_data.pull_requests OWNER TO augur;
-
---
--- Name: COLUMN pull_requests.pr_src_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_requests.pr_src_id IS 'The pr_src_id is unique across all of github.';
-
-
---
--- Name: COLUMN pull_requests.pr_augur_issue_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_requests.pr_augur_issue_id IS 'This is to link to the augur stored related issue';
-
-
---
--- Name: COLUMN pull_requests.pr_src_number; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_requests.pr_src_number IS 'The pr_src_number is unique within a repository.';
-
-
---
--- Name: COLUMN pull_requests.pr_teams; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_requests.pr_teams IS 'One to many with pull request teams. ';
-
-
---
--- Name: COLUMN pull_requests.pr_review_comment_url; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_requests.pr_review_comment_url IS 'This is a field with limited utility. It does expose how to access a specific comment if needed with parameters. If the source changes URL structure, it may be useful';
-
-
---
--- Name: COLUMN pull_requests.pr_meta_head_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_requests.pr_meta_head_id IS 'The metadata for the head repo that links to the pull_request_meta table. ';
-
-
---
--- Name: COLUMN pull_requests.pr_meta_base_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_requests.pr_meta_base_id IS 'The metadata for the base repo that links to the pull_request_meta table. ';
-
-
---
--- Name: COLUMN pull_requests.pr_augur_contributor_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.pull_requests.pr_augur_contributor_id IS 'This is to link to the augur contributor record. ';
-
-
---
--- Name: releases_release_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.releases_release_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.releases_release_id_seq OWNER TO augur;
-
---
--- Name: releases; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.releases (
-    release_id character(64) DEFAULT nextval('augur_data.releases_release_id_seq'::regclass) NOT NULL,
-    repo_id bigint NOT NULL,
-    release_name character varying,
-    release_description character varying,
-    release_author character varying,
-    release_created_at timestamp(6) without time zone,
-    release_published_at timestamp(6) without time zone,
-    release_updated_at timestamp(6) without time zone,
-    release_is_draft boolean,
-    release_is_prerelease boolean,
-    release_tag_name character varying,
-    release_url character varying,
-    tag_only boolean,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.releases OWNER TO augur;
-
---
--- Name: repo_repo_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_repo_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_repo_id_seq OWNER TO augur;
-
---
--- Name: repo; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo (
-    repo_id bigint DEFAULT nextval('augur_data.repo_repo_id_seq'::regclass) NOT NULL,
-    repo_group_id bigint NOT NULL,
-    repo_git character varying NOT NULL,
-    repo_path character varying DEFAULT 'NULL'::character varying,
-    repo_name character varying DEFAULT 'NULL'::character varying,
-    repo_added timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
-    repo_status character varying DEFAULT 'New'::character varying NOT NULL,
-    repo_type character varying DEFAULT ''::character varying,
-    url character varying,
-    owner_id integer,
-    description character varying,
-    primary_language character varying,
-    created_at character varying,
-    forked_from character varying,
-    updated_at timestamp(0) without time zone,
-    repo_archived_date_collected timestamp(0) with time zone,
-    repo_archived integer,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo OWNER TO augur;
-
---
--- Name: TABLE repo; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo IS 'This table is a combination of the columns in Facade’s repo table and GHTorrent’s projects table. ';
-
-
---
--- Name: COLUMN repo.repo_type; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo.repo_type IS 'This field is intended to indicate if the repository is the "main instance" of a repository in cases where implementations choose to add the same repository to more than one repository group. In cases where the repository group is of rg_type Github Organization then this repo_type should be "primary". In other cases the repo_type should probably be "user created". We made this a varchar in order to hold open the possibility that there are additional repo_types we have not thought about. ';
-
-
---
--- Name: repo_badging_badge_collection_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_badging_badge_collection_id_seq
-    START WITH 25012
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_badging_badge_collection_id_seq OWNER TO augur;
-
---
--- Name: repo_badging; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_badging (
-    badge_collection_id bigint DEFAULT nextval('augur_data.repo_badging_badge_collection_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    created_at timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    data jsonb
-);
-
-
-ALTER TABLE augur_data.repo_badging OWNER TO augur;
-
---
--- Name: TABLE repo_badging; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo_badging IS 'This will be collected from the LF’s Badging API
-https://bestpractices.coreinfrastructure.org/projects.json?pq=https%3A%2F%2Fgithub.com%2Fchaoss%2Faugur
-';
-
-
---
--- Name: repo_cluster_messages_msg_cluster_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_cluster_messages_msg_cluster_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_cluster_messages_msg_cluster_id_seq OWNER TO augur;
-
---
--- Name: repo_cluster_messages; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_cluster_messages (
-    msg_cluster_id bigint DEFAULT nextval('augur_data.repo_cluster_messages_msg_cluster_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    cluster_content integer,
-    cluster_mechanism integer,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_cluster_messages OWNER TO augur;
-
---
--- Name: repo_dependencies_repo_dependencies_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_dependencies_repo_dependencies_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_dependencies_repo_dependencies_id_seq OWNER TO augur;
-
---
--- Name: repo_dependencies; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_dependencies (
-    repo_dependencies_id bigint DEFAULT nextval('augur_data.repo_dependencies_repo_dependencies_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    dep_name character varying,
-    dep_count integer,
-    dep_language character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_dependencies OWNER TO augur;
-
---
--- Name: TABLE repo_dependencies; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo_dependencies IS 'Contains the dependencies for a repo.';
-
-
---
--- Name: COLUMN repo_dependencies.repo_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_dependencies.repo_id IS 'Forign key for repo id. ';
-
-
---
--- Name: COLUMN repo_dependencies.dep_name; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_dependencies.dep_name IS 'Name of the dependancy found in project. ';
-
-
---
--- Name: COLUMN repo_dependencies.dep_count; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_dependencies.dep_count IS 'Number of times the dependancy was found. ';
-
-
---
--- Name: COLUMN repo_dependencies.dep_language; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_dependencies.dep_language IS 'Language of the dependancy. ';
-
-
---
--- Name: repo_deps_libyear_repo_deps_libyear_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_deps_libyear_repo_deps_libyear_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_deps_libyear_repo_deps_libyear_id_seq OWNER TO augur;
-
---
--- Name: repo_deps_libyear; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_deps_libyear (
-    repo_deps_libyear_id bigint DEFAULT nextval('augur_data.repo_deps_libyear_repo_deps_libyear_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    name character varying,
-    requirement character varying,
-    type character varying,
-    package_manager character varying,
-    current_verion character varying,
-    latest_version character varying,
-    current_release_date character varying,
-    latest_release_date character varying,
-    libyear double precision,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_deps_libyear OWNER TO augur;
-
---
--- Name: repo_deps_scorecard_repo_deps_scorecard_id_seq1; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1 OWNER TO augur;
-
---
--- Name: repo_deps_scorecard; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_deps_scorecard (
-    repo_deps_scorecard_id bigint DEFAULT nextval('augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1'::regclass) NOT NULL,
-    repo_id bigint,
-    name character varying,
-    status character varying,
-    score character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_deps_scorecard OWNER TO augur;
-
---
--- Name: repo_group_insights_rgi_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_group_insights_rgi_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_group_insights_rgi_id_seq OWNER TO augur;
-
---
--- Name: repo_group_insights; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_group_insights (
-    rgi_id bigint DEFAULT nextval('augur_data.repo_group_insights_rgi_id_seq'::regclass) NOT NULL,
-    repo_group_id bigint,
-    rgi_metric character varying,
-    rgi_value character varying,
-    cms_id bigint,
-    rgi_fresh boolean,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_group_insights OWNER TO augur;
-
---
--- Name: TABLE repo_group_insights; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo_group_insights IS 'This table is output from an analytical worker inside of Augur. It runs through the different metrics on a REPOSITORY_GROUP and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. 
-
-Worker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. ';
-
-
---
--- Name: COLUMN repo_group_insights.rgi_fresh; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_group_insights.rgi_fresh IS 'false if the date is before the statistic that triggered the insight, true if after. This allows us to automatically display only "fresh insights" and avoid displaying "stale insights". The insight worker will populate this table. ';
-
-
---
--- Name: repo_groups_repo_group_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_groups_repo_group_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_groups_repo_group_id_seq OWNER TO augur;
-
---
--- Name: repo_groups; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_groups (
-    repo_group_id bigint DEFAULT nextval('augur_data.repo_groups_repo_group_id_seq'::regclass) NOT NULL,
-    rg_name character varying NOT NULL,
-    rg_description character varying DEFAULT 'NULL'::character varying,
-    rg_website character varying(128) DEFAULT 'NULL'::character varying,
-    rg_recache smallint DEFAULT 1,
-    rg_last_modified timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
-    rg_type character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.repo_groups OWNER TO augur;
-
---
--- Name: TABLE repo_groups; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo_groups IS 'rg_type is intended to be either a GitHub Organization or a User Created Repo Group. ';
-
-
---
--- Name: repo_groups_list_serve_rgls_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_groups_list_serve_rgls_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_groups_list_serve_rgls_id_seq OWNER TO augur;
-
---
--- Name: repo_groups_list_serve; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_groups_list_serve (
-    rgls_id bigint DEFAULT nextval('augur_data.repo_groups_list_serve_rgls_id_seq'::regclass) NOT NULL,
-    repo_group_id bigint NOT NULL,
-    rgls_name character varying,
-    rgls_description character varying(3000),
-    rgls_sponsor character varying,
-    rgls_email character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.repo_groups_list_serve OWNER TO augur;
-
---
--- Name: repo_info_repo_info_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_info_repo_info_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_info_repo_info_id_seq OWNER TO augur;
-
---
--- Name: repo_info; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_info (
-    repo_info_id bigint DEFAULT nextval('augur_data.repo_info_repo_info_id_seq'::regclass) NOT NULL,
-    repo_id bigint NOT NULL,
-    last_updated timestamp(0) without time zone DEFAULT NULL::timestamp without time zone,
-    issues_enabled character varying,
-    open_issues integer,
-    pull_requests_enabled character varying,
-    wiki_enabled character varying,
-    pages_enabled character varying,
-    fork_count integer,
-    default_branch character varying,
-    watchers_count integer,
-    "UUID" integer,
-    license character varying,
-    stars_count integer,
-    committers_count integer,
-    issue_contributors_count character varying,
-    changelog_file character varying,
-    contributing_file character varying,
-    license_file character varying,
-    code_of_conduct_file character varying,
-    security_issue_file character varying,
-    security_audit_file character varying,
-    status character varying,
-    keywords character varying,
-    commit_count bigint,
-    issues_count bigint,
-    issues_closed bigint,
-    pull_request_count bigint,
-    pull_requests_open bigint,
-    pull_requests_closed bigint,
-    pull_requests_merged bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_info OWNER TO augur;
-
---
--- Name: repo_insights_ri_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_insights_ri_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_insights_ri_id_seq OWNER TO augur;
-
---
--- Name: repo_insights; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_insights (
-    ri_id bigint DEFAULT nextval('augur_data.repo_insights_ri_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    ri_metric character varying,
-    ri_value character varying,
-    ri_date timestamp(0) without time zone,
-    ri_fresh boolean,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    ri_score numeric,
-    ri_field character varying,
-    ri_detection_method character varying
-);
-
-
-ALTER TABLE augur_data.repo_insights OWNER TO augur;
-
---
--- Name: TABLE repo_insights; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo_insights IS 'This table is output from an analytical worker inside of Augur. It runs through the different metrics on a repository and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. 
-
-Worker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. ';
-
-
---
--- Name: COLUMN repo_insights.ri_fresh; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights.ri_fresh IS 'false if the date is before the statistic that triggered the insight, true if after. This allows us to automatically display only "fresh insights" and avoid displaying "stale insights". The insight worker will populate this table. ';
-
-
---
--- Name: repo_insights_records_ri_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_insights_records_ri_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_insights_records_ri_id_seq OWNER TO augur;
-
---
--- Name: repo_insights_records; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_insights_records (
-    ri_id bigint DEFAULT nextval('augur_data.repo_insights_records_ri_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    ri_metric character varying,
-    ri_field character varying,
-    ri_value character varying,
-    ri_date timestamp(6) without time zone,
-    ri_score double precision,
-    ri_detection_method character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_insights_records OWNER TO augur;
-
---
--- Name: COLUMN repo_insights_records.ri_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.ri_id IS 'Primary key. ';
-
-
---
--- Name: COLUMN repo_insights_records.repo_id; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.repo_id IS 'Refers to repo table primary key. Will have a foreign key';
-
-
---
--- Name: COLUMN repo_insights_records.ri_metric; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.ri_metric IS 'The metric endpoint';
-
-
---
--- Name: COLUMN repo_insights_records.ri_field; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.ri_field IS 'The field in the metric endpoint';
-
-
---
--- Name: COLUMN repo_insights_records.ri_value; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.ri_value IS 'The value of the endpoint in ri_field';
-
-
---
--- Name: COLUMN repo_insights_records.ri_date; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.ri_date IS 'The date the insight is for; in other words, some anomaly occurred on this date. ';
-
-
---
--- Name: COLUMN repo_insights_records.ri_score; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.ri_score IS 'A Score, derived from the algorithm used. ';
-
-
---
--- Name: COLUMN repo_insights_records.ri_detection_method; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.ri_detection_method IS 'A confidence interval or other expression of the type of threshold and the value of a threshold met in order for it to be "an insight". Example. "95% confidence interval". ';
-
-
---
--- Name: COLUMN repo_insights_records.tool_source; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.tool_source IS 'Standard Augur Metadata';
-
-
---
--- Name: COLUMN repo_insights_records.tool_version; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.tool_version IS 'Standard Augur Metadata';
-
-
---
--- Name: COLUMN repo_insights_records.data_source; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.data_source IS 'Standard Augur Metadata';
-
-
---
--- Name: COLUMN repo_insights_records.data_collection_date; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_insights_records.data_collection_date IS 'Standard Augur Metadata';
-
-
---
--- Name: repo_labor_repo_labor_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_labor_repo_labor_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_labor_repo_labor_id_seq OWNER TO augur;
-
---
--- Name: repo_labor; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_labor (
-    repo_labor_id bigint DEFAULT nextval('augur_data.repo_labor_repo_labor_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    repo_clone_date timestamp(0) without time zone,
-    rl_analysis_date timestamp(0) without time zone,
-    programming_language character varying,
-    file_path character varying,
-    file_name character varying,
-    total_lines integer,
-    code_lines integer,
-    comment_lines integer,
-    blank_lines integer,
-    code_complexity integer,
-    repo_url character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.repo_labor OWNER TO augur;
-
---
--- Name: TABLE repo_labor; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo_labor IS 'repo_labor is a derivative of tables used to store scc code and complexity counting statistics that are inputs to labor analysis, which are components of CHAOSS value metric calculations. ';
-
-
---
--- Name: COLUMN repo_labor.repo_url; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON COLUMN augur_data.repo_labor.repo_url IS 'This is a convenience column to simplify analysis against external datasets';
-
-
---
--- Name: repo_meta_rmeta_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_meta_rmeta_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_meta_rmeta_id_seq OWNER TO augur;
-
---
--- Name: repo_meta; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_meta (
-    repo_id bigint NOT NULL,
-    rmeta_id bigint DEFAULT nextval('augur_data.repo_meta_rmeta_id_seq'::regclass) NOT NULL,
-    rmeta_name character varying,
-    rmeta_value character varying DEFAULT 0,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.repo_meta OWNER TO augur;
-
---
--- Name: TABLE repo_meta; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo_meta IS 'Project Languages';
-
-
---
--- Name: repo_sbom_scans_rsb_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_sbom_scans_rsb_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_sbom_scans_rsb_id_seq OWNER TO augur;
-
---
--- Name: repo_sbom_scans; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_sbom_scans (
-    rsb_id bigint DEFAULT nextval('augur_data.repo_sbom_scans_rsb_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    sbom_scan json
-);
-
-
-ALTER TABLE augur_data.repo_sbom_scans OWNER TO augur;
-
---
--- Name: repo_stats_rstat_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_stats_rstat_id_seq
-    START WITH 25430
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_stats_rstat_id_seq OWNER TO augur;
-
---
--- Name: repo_stats; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_stats (
-    repo_id bigint NOT NULL,
-    rstat_id bigint DEFAULT nextval('augur_data.repo_stats_rstat_id_seq'::regclass) NOT NULL,
-    rstat_name character varying(400),
-    rstat_value bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone
-);
-
-
-ALTER TABLE augur_data.repo_stats OWNER TO augur;
-
---
--- Name: TABLE repo_stats; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON TABLE augur_data.repo_stats IS 'Project Watchers';
-
-
---
--- Name: repo_test_coverage_repo_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_test_coverage_repo_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_test_coverage_repo_id_seq OWNER TO augur;
-
---
--- Name: repo_test_coverage; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_test_coverage (
-    repo_id bigint DEFAULT nextval('augur_data.repo_test_coverage_repo_id_seq'::regclass) NOT NULL,
-    repo_clone_date timestamp(0) without time zone,
-    rtc_analysis_date timestamp(0) without time zone,
-    programming_language character varying,
-    file_path character varying,
-    file_name character varying,
-    testing_tool character varying,
-    file_statement_count bigint,
-    file_subroutine_count bigint,
-    file_statements_tested bigint,
-    file_subroutines_tested bigint,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_test_coverage OWNER TO augur;
-
---
--- Name: repo_topic_repo_topic_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.repo_topic_repo_topic_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.repo_topic_repo_topic_id_seq OWNER TO augur;
-
---
--- Name: repo_topic; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repo_topic (
-    repo_topic_id bigint DEFAULT nextval('augur_data.repo_topic_repo_topic_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    topic_id integer,
-    topic_prob double precision,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.repo_topic OWNER TO augur;
-
---
--- Name: repos_fetch_log; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.repos_fetch_log (
-    repos_id integer NOT NULL,
-    status character varying(128) NOT NULL,
-    date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL
-);
-
-
-ALTER TABLE augur_data.repos_fetch_log OWNER TO augur;
-
---
--- Name: settings; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.settings (
-    id integer NOT NULL,
-    setting character varying(32) NOT NULL,
-    value character varying NOT NULL,
-    last_modified timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL
-);
-
-
-ALTER TABLE augur_data.settings OWNER TO augur;
-
---
--- Name: topic_words_topic_words_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.topic_words_topic_words_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.topic_words_topic_words_id_seq OWNER TO augur;
-
---
--- Name: topic_words; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.topic_words (
-    topic_words_id bigint DEFAULT nextval('augur_data.topic_words_topic_words_id_seq'::regclass) NOT NULL,
-    topic_id bigint,
-    word character varying,
-    word_prob double precision,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.topic_words OWNER TO augur;
-
---
--- Name: unknown_cache; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.unknown_cache (
-    type character varying(10) NOT NULL,
-    repo_group_id integer NOT NULL,
-    email character varying(128) NOT NULL,
-    domain character varying(128) DEFAULT 'NULL'::character varying,
-    added bigint NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.unknown_cache OWNER TO augur;
-
---
--- Name: unresolved_commit_emails_email_unresolved_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.unresolved_commit_emails_email_unresolved_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.unresolved_commit_emails_email_unresolved_id_seq OWNER TO augur;
-
---
--- Name: unresolved_commit_emails; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.unresolved_commit_emails (
-    email_unresolved_id bigint DEFAULT nextval('augur_data.unresolved_commit_emails_email_unresolved_id_seq'::regclass) NOT NULL,
-    email character varying NOT NULL,
-    name character varying,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP
-);
-
-
-ALTER TABLE augur_data.unresolved_commit_emails OWNER TO augur;
-
---
--- Name: utility_log_id_seq1; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.utility_log_id_seq1
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.utility_log_id_seq1 OWNER TO augur;
-
---
--- Name: utility_log; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.utility_log (
-    id bigint DEFAULT nextval('augur_data.utility_log_id_seq1'::regclass) NOT NULL,
-    level character varying(8) NOT NULL,
-    status character varying NOT NULL,
-    attempted timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL
-);
-
-
-ALTER TABLE augur_data.utility_log OWNER TO augur;
-
---
--- Name: utility_log_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur
---
-
-CREATE SEQUENCE augur_data.utility_log_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_data.utility_log_id_seq OWNER TO augur;
-
---
--- Name: working_commits; Type: TABLE; Schema: augur_data; Owner: augur
---
-
-CREATE TABLE augur_data.working_commits (
-    repos_id integer NOT NULL,
-    working_commit character varying(40) DEFAULT 'NULL'::character varying
-);
-
-
-ALTER TABLE augur_data.working_commits OWNER TO augur;
-
---
--- Name: affiliations_corp_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur
---
-
-CREATE SEQUENCE augur_operations.affiliations_corp_id_seq
-    START WITH 620000
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_operations.affiliations_corp_id_seq OWNER TO augur;
-
---
--- Name: all; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations."all" (
-    "Name" character varying,
-    "Bytes" character varying,
-    "Lines" character varying,
-    "Code" character varying,
-    "Comment" character varying,
-    "Blank" character varying,
-    "Complexity" character varying,
-    "Count" character varying,
-    "WeightedComplexity" character varying,
-    "Files" character varying
-);
-
-
-ALTER TABLE augur_operations."all" OWNER TO augur;
-
---
--- Name: augur_settings_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur
---
-
-CREATE SEQUENCE augur_operations.augur_settings_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_operations.augur_settings_id_seq OWNER TO augur;
-
---
--- Name: augur_settings; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.augur_settings (
-    id bigint DEFAULT nextval('augur_operations.augur_settings_id_seq'::regclass) NOT NULL,
-    setting character varying,
-    value character varying,
-    last_modified timestamp(0) without time zone DEFAULT CURRENT_DATE
-);
-
-
-ALTER TABLE augur_operations.augur_settings OWNER TO augur;
-
---
--- Name: TABLE augur_settings; Type: COMMENT; Schema: augur_operations; Owner: augur
---
-
-COMMENT ON TABLE augur_operations.augur_settings IS 'Augur settings include the schema version, and the Augur API Key as of 10/25/2020. Future augur settings may be stored in this table, which has the basic structure of a name-value pair. ';
-
-
---
--- Name: config; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.config (
-    id smallint NOT NULL,
-    section_name character varying NOT NULL,
-    setting_name character varying NOT NULL,
-    value character varying,
-    type character varying
-);
-
-
-ALTER TABLE augur_operations.config OWNER TO augur;
-
---
--- Name: config_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur
---
-
-CREATE SEQUENCE augur_operations.config_id_seq
-    AS smallint
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_operations.config_id_seq OWNER TO augur;
-
---
--- Name: config_id_seq; Type: SEQUENCE OWNED BY; Schema: augur_operations; Owner: augur
---
-
-ALTER SEQUENCE augur_operations.config_id_seq OWNED BY augur_operations.config.id;
-
-
---
--- Name: gh_worker_history_history_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur
---
-
-CREATE SEQUENCE augur_operations.gh_worker_history_history_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_operations.gh_worker_history_history_id_seq OWNER TO augur;
-
---
--- Name: repos_fetch_log; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.repos_fetch_log (
-    repos_id integer NOT NULL,
-    status character varying(128) NOT NULL,
-    date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL
-);
-
-
-ALTER TABLE augur_operations.repos_fetch_log OWNER TO augur;
-
---
--- Name: TABLE repos_fetch_log; Type: COMMENT; Schema: augur_operations; Owner: augur
---
-
-COMMENT ON TABLE augur_operations.repos_fetch_log IS 'For future use when we move all working tables to the augur_operations schema. ';
-
-
---
--- Name: users; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.users (
-    user_id integer NOT NULL,
-    login_name character varying NOT NULL,
-    login_hashword character varying NOT NULL,
-    email character varying NOT NULL,
-    text_phone character varying,
-    first_name character varying NOT NULL,
-    last_name character varying NOT NULL,
-    tool_source character varying,
-    tool_version character varying,
-    data_source character varying,
-    data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP,
-    admin boolean NOT NULL
-);
-
-
-ALTER TABLE augur_operations.users OWNER TO augur;
-
---
--- Name: users_user_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur
---
-
-CREATE SEQUENCE augur_operations.users_user_id_seq
-    AS integer
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_operations.users_user_id_seq OWNER TO augur;
-
---
--- Name: users_user_id_seq; Type: SEQUENCE OWNED BY; Schema: augur_operations; Owner: augur
---
-
-ALTER SEQUENCE augur_operations.users_user_id_seq OWNED BY augur_operations.users.user_id;
-
-
---
--- Name: worker_history; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.worker_history (
-    history_id bigint DEFAULT nextval('augur_operations.gh_worker_history_history_id_seq'::regclass) NOT NULL,
-    repo_id bigint,
-    worker character varying(255) NOT NULL,
-    job_model character varying(255) NOT NULL,
-    oauth_id integer,
-    "timestamp" timestamp(0) without time zone NOT NULL,
-    status character varying(7) NOT NULL,
-    total_results integer
-);
-
-
-ALTER TABLE augur_operations.worker_history OWNER TO augur;
-
---
--- Name: TABLE worker_history; Type: COMMENT; Schema: augur_operations; Owner: augur
---
-
-COMMENT ON TABLE augur_operations.worker_history IS 'This table stores the complete history of job execution, including success and failure. It is useful for troubleshooting. ';
-
-
---
--- Name: worker_job; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.worker_job (
-    job_model character varying(255) NOT NULL,
-    state integer DEFAULT 0 NOT NULL,
-    zombie_head integer,
-    since_id_str character varying(255) DEFAULT '0'::character varying NOT NULL,
-    description character varying(255) DEFAULT 'None'::character varying,
-    last_count integer,
-    last_run timestamp(0) without time zone DEFAULT NULL::timestamp without time zone,
-    analysis_state integer DEFAULT 0,
-    oauth_id integer NOT NULL
-);
-
-
-ALTER TABLE augur_operations.worker_job OWNER TO augur;
-
---
--- Name: TABLE worker_job; Type: COMMENT; Schema: augur_operations; Owner: augur
---
-
-COMMENT ON TABLE augur_operations.worker_job IS 'This table stores the jobs workers collect data for. A job is found in the code, and in the augur.config.json under the construct of a “model”. ';
-
-
---
--- Name: worker_oauth_oauth_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur
---
-
-CREATE SEQUENCE augur_operations.worker_oauth_oauth_id_seq
-    START WITH 1000
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
-ALTER TABLE augur_operations.worker_oauth_oauth_id_seq OWNER TO augur;
-
---
--- Name: worker_oauth; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.worker_oauth (
-    oauth_id bigint DEFAULT nextval('augur_operations.worker_oauth_oauth_id_seq'::regclass) NOT NULL,
-    name character varying(255) NOT NULL,
-    consumer_key character varying(255) NOT NULL,
-    consumer_secret character varying(255) NOT NULL,
-    access_token character varying(255) NOT NULL,
-    access_token_secret character varying(255) NOT NULL,
-    repo_directory character varying,
-    platform character varying DEFAULT 'github'::character varying
-);
-
-
-ALTER TABLE augur_operations.worker_oauth OWNER TO augur;
-
---
--- Name: TABLE worker_oauth; Type: COMMENT; Schema: augur_operations; Owner: augur
---
-
-COMMENT ON TABLE augur_operations.worker_oauth IS 'This table stores credentials for retrieving data from platform API’s. Entries in this table must comply with the terms of service for each platform. ';
-
-
---
--- Name: worker_settings_facade; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.worker_settings_facade (
-    id integer NOT NULL,
-    setting character varying(32) NOT NULL,
-    value character varying NOT NULL,
-    last_modified timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL
-);
-
-
-ALTER TABLE augur_operations.worker_settings_facade OWNER TO augur;
-
---
--- Name: TABLE worker_settings_facade; Type: COMMENT; Schema: augur_operations; Owner: augur
---
-
-COMMENT ON TABLE augur_operations.worker_settings_facade IS 'For future use when we move all working tables to the augur_operations schema. ';
-
-
---
--- Name: working_commits; Type: TABLE; Schema: augur_operations; Owner: augur
---
-
-CREATE TABLE augur_operations.working_commits (
-    repos_id integer NOT NULL,
-    working_commit character varying(40) DEFAULT 'NULL'::character varying
-);
-
-
-ALTER TABLE augur_operations.working_commits OWNER TO augur;
-
---
--- Name: TABLE working_commits; Type: COMMENT; Schema: augur_operations; Owner: augur
---
-
-COMMENT ON TABLE augur_operations.working_commits IS 'For future use when we move all working tables to the augur_operations schema. ';
-
-
---
--- Name: alembic_version; Type: TABLE; Schema: public; Owner: augur
---
-
-CREATE TABLE public.alembic_version (
-    version_num character varying(32) NOT NULL
-);
-
-
-ALTER TABLE public.alembic_version OWNER TO augur;
-
---
--- Name: annotation_types_annotation_type_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.annotation_types_annotation_type_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.annotation_types_annotation_type_id_seq OWNER TO augur;
-
---
--- Name: annotation_types; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.annotation_types (
-    annotation_type_id integer DEFAULT nextval('spdx.annotation_types_annotation_type_id_seq'::regclass) NOT NULL,
-    name character varying(255) NOT NULL
-);
-
-
-ALTER TABLE spdx.annotation_types OWNER TO augur;
-
---
--- Name: annotations_annotation_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.annotations_annotation_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.annotations_annotation_id_seq OWNER TO augur;
-
---
--- Name: annotations; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.annotations (
-    annotation_id integer DEFAULT nextval('spdx.annotations_annotation_id_seq'::regclass) NOT NULL,
-    document_id integer NOT NULL,
-    annotation_type_id integer NOT NULL,
-    identifier_id integer NOT NULL,
-    creator_id integer NOT NULL,
-    created_ts timestamp(6) with time zone,
-    comment text NOT NULL
-);
-
-
-ALTER TABLE spdx.annotations OWNER TO augur;
-
---
--- Name: augur_repo_map_map_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.augur_repo_map_map_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.augur_repo_map_map_id_seq OWNER TO augur;
-
---
--- Name: augur_repo_map; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.augur_repo_map (
-    map_id integer DEFAULT nextval('spdx.augur_repo_map_map_id_seq'::regclass) NOT NULL,
-    dosocs_pkg_id integer,
-    dosocs_pkg_name text,
-    repo_id integer,
-    repo_path text
-);
-
-
-ALTER TABLE spdx.augur_repo_map OWNER TO augur;
-
---
--- Name: creator_types_creator_type_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.creator_types_creator_type_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.creator_types_creator_type_id_seq OWNER TO augur;
-
---
--- Name: creator_types; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.creator_types (
-    creator_type_id integer DEFAULT nextval('spdx.creator_types_creator_type_id_seq'::regclass) NOT NULL,
-    name character varying(255) NOT NULL
-);
-
-
-ALTER TABLE spdx.creator_types OWNER TO augur;
-
---
--- Name: creators_creator_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.creators_creator_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.creators_creator_id_seq OWNER TO augur;
-
---
--- Name: creators; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.creators (
-    creator_id integer DEFAULT nextval('spdx.creators_creator_id_seq'::regclass) NOT NULL,
-    creator_type_id integer NOT NULL,
-    name character varying(255) NOT NULL,
-    email character varying(255) NOT NULL
-);
-
-
-ALTER TABLE spdx.creators OWNER TO augur;
-
---
--- Name: document_namespaces_document_namespace_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.document_namespaces_document_namespace_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.document_namespaces_document_namespace_id_seq OWNER TO augur;
-
---
--- Name: document_namespaces; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.document_namespaces (
-    document_namespace_id integer DEFAULT nextval('spdx.document_namespaces_document_namespace_id_seq'::regclass) NOT NULL,
-    uri character varying(500) NOT NULL
-);
-
-
-ALTER TABLE spdx.document_namespaces OWNER TO augur;
-
---
--- Name: documents_document_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.documents_document_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.documents_document_id_seq OWNER TO augur;
-
---
--- Name: documents; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.documents (
-    document_id integer DEFAULT nextval('spdx.documents_document_id_seq'::regclass) NOT NULL,
-    document_namespace_id integer NOT NULL,
-    data_license_id integer NOT NULL,
-    spdx_version character varying(255) NOT NULL,
-    name character varying(255) NOT NULL,
-    license_list_version character varying(255) NOT NULL,
-    created_ts timestamp(6) with time zone NOT NULL,
-    creator_comment text NOT NULL,
-    document_comment text NOT NULL,
-    package_id integer NOT NULL
-);
-
-
-ALTER TABLE spdx.documents OWNER TO augur;
-
---
--- Name: documents_creators_document_creator_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.documents_creators_document_creator_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.documents_creators_document_creator_id_seq OWNER TO augur;
-
---
--- Name: documents_creators; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.documents_creators (
-    document_creator_id integer DEFAULT nextval('spdx.documents_creators_document_creator_id_seq'::regclass) NOT NULL,
-    document_id integer NOT NULL,
-    creator_id integer NOT NULL
-);
-
-
-ALTER TABLE spdx.documents_creators OWNER TO augur;
-
---
--- Name: external_refs_external_ref_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.external_refs_external_ref_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.external_refs_external_ref_id_seq OWNER TO augur;
-
---
--- Name: external_refs; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.external_refs (
-    external_ref_id integer DEFAULT nextval('spdx.external_refs_external_ref_id_seq'::regclass) NOT NULL,
-    document_id integer NOT NULL,
-    document_namespace_id integer NOT NULL,
-    id_string character varying(255) NOT NULL,
-    sha256 character varying(64) NOT NULL
-);
-
-
-ALTER TABLE spdx.external_refs OWNER TO augur;
-
---
--- Name: file_contributors_file_contributor_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.file_contributors_file_contributor_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.file_contributors_file_contributor_id_seq OWNER TO augur;
-
---
--- Name: file_contributors; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.file_contributors (
-    file_contributor_id integer DEFAULT nextval('spdx.file_contributors_file_contributor_id_seq'::regclass) NOT NULL,
-    file_id integer NOT NULL,
-    contributor text NOT NULL
-);
-
-
-ALTER TABLE spdx.file_contributors OWNER TO augur;
-
---
--- Name: file_types; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.file_types (
-    file_type_id integer,
-    name character varying(255) NOT NULL
-);
-
-
-ALTER TABLE spdx.file_types OWNER TO augur;
-
---
--- Name: file_types_file_type_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.file_types_file_type_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.file_types_file_type_id_seq OWNER TO augur;
-
---
--- Name: files_file_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.files_file_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.files_file_id_seq OWNER TO augur;
-
---
--- Name: files; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.files (
-    file_id integer DEFAULT nextval('spdx.files_file_id_seq'::regclass) NOT NULL,
-    file_type_id integer,
-    sha256 character varying(64) NOT NULL,
-    copyright_text text,
-    package_id integer,
-    comment text NOT NULL,
-    notice text NOT NULL
-);
-
-
-ALTER TABLE spdx.files OWNER TO augur;
-
---
--- Name: files_licenses_file_license_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.files_licenses_file_license_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.files_licenses_file_license_id_seq OWNER TO augur;
-
---
--- Name: files_licenses; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.files_licenses (
-    file_license_id integer DEFAULT nextval('spdx.files_licenses_file_license_id_seq'::regclass) NOT NULL,
-    file_id integer NOT NULL,
-    license_id integer NOT NULL,
-    extracted_text text NOT NULL
-);
-
-
-ALTER TABLE spdx.files_licenses OWNER TO augur;
-
---
--- Name: files_scans_file_scan_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.files_scans_file_scan_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.files_scans_file_scan_id_seq OWNER TO augur;
-
---
--- Name: files_scans; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.files_scans (
-    file_scan_id integer DEFAULT nextval('spdx.files_scans_file_scan_id_seq'::regclass) NOT NULL,
-    file_id integer NOT NULL,
-    scanner_id integer NOT NULL
-);
-
-
-ALTER TABLE spdx.files_scans OWNER TO augur;
-
---
--- Name: identifiers_identifier_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.identifiers_identifier_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.identifiers_identifier_id_seq OWNER TO augur;
-
---
--- Name: identifiers; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.identifiers (
-    identifier_id integer DEFAULT nextval('spdx.identifiers_identifier_id_seq'::regclass) NOT NULL,
-    document_namespace_id integer NOT NULL,
-    id_string character varying(255) NOT NULL,
-    document_id integer,
-    package_id integer,
-    package_file_id integer,
-    CONSTRAINT ck_identifier_exactly_one CHECK ((((((document_id IS NOT NULL))::integer + ((package_id IS NOT NULL))::integer) + ((package_file_id IS NOT NULL))::integer) = 1))
-);
-
-
-ALTER TABLE spdx.identifiers OWNER TO augur;
-
---
--- Name: licenses_license_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.licenses_license_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.licenses_license_id_seq OWNER TO augur;
-
---
--- Name: licenses; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.licenses (
-    license_id integer DEFAULT nextval('spdx.licenses_license_id_seq'::regclass) NOT NULL,
-    name character varying(255),
-    short_name character varying(255) NOT NULL,
-    cross_reference text NOT NULL,
-    comment text NOT NULL,
-    is_spdx_official boolean NOT NULL
-);
-
-
-ALTER TABLE spdx.licenses OWNER TO augur;
-
---
--- Name: packages_package_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.packages_package_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.packages_package_id_seq OWNER TO augur;
-
---
--- Name: packages; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.packages (
-    package_id integer DEFAULT nextval('spdx.packages_package_id_seq'::regclass) NOT NULL,
-    name character varying(255) NOT NULL,
-    version character varying(255) NOT NULL,
-    file_name text NOT NULL,
-    supplier_id integer,
-    originator_id integer,
-    download_location text,
-    verification_code character varying(64) NOT NULL,
-    ver_code_excluded_file_id integer,
-    sha256 character varying(64),
-    home_page text,
-    source_info text NOT NULL,
-    concluded_license_id integer,
-    declared_license_id integer,
-    license_comment text NOT NULL,
-    copyright_text text,
-    summary text NOT NULL,
-    description text NOT NULL,
-    comment text NOT NULL,
-    dosocs2_dir_code character varying(64),
-    CONSTRAINT uc_sha256_ds2_dir_code_exactly_one CHECK (((((sha256 IS NOT NULL))::integer + ((dosocs2_dir_code IS NOT NULL))::integer) = 1))
-);
-
-
-ALTER TABLE spdx.packages OWNER TO augur;
-
---
--- Name: packages_files_package_file_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.packages_files_package_file_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.packages_files_package_file_id_seq OWNER TO augur;
-
---
--- Name: packages_files; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.packages_files (
-    package_file_id integer DEFAULT nextval('spdx.packages_files_package_file_id_seq'::regclass) NOT NULL,
-    package_id integer NOT NULL,
-    file_id integer NOT NULL,
-    concluded_license_id integer,
-    license_comment text NOT NULL,
-    file_name text NOT NULL
-);
-
-
-ALTER TABLE spdx.packages_files OWNER TO augur;
-
---
--- Name: packages_scans_package_scan_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.packages_scans_package_scan_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.packages_scans_package_scan_id_seq OWNER TO augur;
-
---
--- Name: packages_scans; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.packages_scans (
-    package_scan_id integer DEFAULT nextval('spdx.packages_scans_package_scan_id_seq'::regclass) NOT NULL,
-    package_id integer NOT NULL,
-    scanner_id integer NOT NULL
-);
-
-
-ALTER TABLE spdx.packages_scans OWNER TO augur;
-
---
--- Name: projects_package_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.projects_package_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.projects_package_id_seq OWNER TO augur;
-
---
--- Name: projects; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.projects (
-    package_id integer DEFAULT nextval('spdx.projects_package_id_seq'::regclass) NOT NULL,
-    name text NOT NULL,
-    homepage text NOT NULL,
-    uri text NOT NULL
-);
-
-
-ALTER TABLE spdx.projects OWNER TO augur;
-
---
--- Name: relationship_types_relationship_type_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.relationship_types_relationship_type_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.relationship_types_relationship_type_id_seq OWNER TO augur;
-
---
--- Name: relationship_types; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.relationship_types (
-    relationship_type_id integer DEFAULT nextval('spdx.relationship_types_relationship_type_id_seq'::regclass) NOT NULL,
-    name character varying(255) NOT NULL
-);
-
-
-ALTER TABLE spdx.relationship_types OWNER TO augur;
-
---
--- Name: relationships_relationship_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.relationships_relationship_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.relationships_relationship_id_seq OWNER TO augur;
-
---
--- Name: relationships; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.relationships (
-    relationship_id integer DEFAULT nextval('spdx.relationships_relationship_id_seq'::regclass) NOT NULL,
-    left_identifier_id integer NOT NULL,
-    right_identifier_id integer NOT NULL,
-    relationship_type_id integer NOT NULL,
-    relationship_comment text NOT NULL
-);
-
-
-ALTER TABLE spdx.relationships OWNER TO augur;
-
---
--- Name: sbom_scans; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.sbom_scans (
-    repo_id integer,
-    sbom_scan json
-);
-
-
-ALTER TABLE spdx.sbom_scans OWNER TO augur;
-
---
--- Name: scanners_scanner_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur
---
-
-CREATE SEQUENCE spdx.scanners_scanner_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    MAXVALUE 2147483647
-    CACHE 1;
-
-
-ALTER TABLE spdx.scanners_scanner_id_seq OWNER TO augur;
-
---
--- Name: scanners; Type: TABLE; Schema: spdx; Owner: augur
---
-
-CREATE TABLE spdx.scanners (
-    scanner_id integer DEFAULT nextval('spdx.scanners_scanner_id_seq'::regclass) NOT NULL,
-    name character varying(255) NOT NULL
-);
-
-
-ALTER TABLE spdx.scanners OWNER TO augur;
-
---
--- Name: chaoss_user chaoss_id; Type: DEFAULT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.chaoss_user ALTER COLUMN chaoss_id SET DEFAULT nextval('augur_data.chaoss_user_chaoss_id_seq'::regclass);
-
-
---
--- Name: config id; Type: DEFAULT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.config ALTER COLUMN id SET DEFAULT nextval('augur_operations.config_id_seq'::regclass);
-
-
---
--- Name: users user_id; Type: DEFAULT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.users ALTER COLUMN user_id SET DEFAULT nextval('augur_operations.users_user_id_seq'::regclass);
-
-
---
--- Data for Name: analysis_log; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.analysis_log (repos_id, status, date_attempted) FROM stdin;
-\.
-
-
---
--- Data for Name: chaoss_metric_status; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.chaoss_metric_status (cms_id, cm_group, cm_source, cm_type, cm_backend_status, cm_frontend_status, cm_defined, cm_api_endpoint_repo, cm_api_endpoint_rg, cm_name, cm_working_group, cm_info, tool_source, tool_version, data_source, data_collection_date, cm_working_group_focus_area) FROM stdin;
-2	growth-maturity-decline	githubapi	timeseries	implemented	unimplemented	t	/api/unstable/<owner>/<repo>/timeseries/githubapi/issues	\N	Open Issues	growth-maturity-decline	"open-issues"	Insight Worker	0.0.1	githubapi	2019-06-20 22:41:41	\N
-3	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/issues	\N	Open Issues	growth-maturity-decline	"open-issues"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:42:15	\N
-4	growth-maturity-decline	githubapi	timeseries	implemented	unimplemented	t	/api/unstable/<owner>/<repo>/timeseries/githubapi/issues/closed	\N	Closed Issues	growth-maturity-decline	"closed-issues"	Insight Worker	0.0.1	githubapi	2019-06-20 22:45:53	\N
-5	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/issues/closed	\N	Closed Issues	growth-maturity-decline	"closed-issues"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:26	\N
-6	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/issues/response_time	\N	First Response To Issue Duration	growth-maturity-decline	"first-response-to-issue-duration"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:27	\N
-7	growth-maturity-decline	githubapi	timeseries	implemented	unimplemented	t	/api/unstable/<owner>/<repo>/timeseries/githubapi/commits	\N	Code Commits	growth-maturity-decline	"code-commits"	Insight Worker	0.0.1	githubapi	2019-06-20 22:49:29	\N
-8	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/commits	\N	Code Commits	growth-maturity-decline	"code-commits"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:30	\N
-9	growth-maturity-decline	githubapi	metric	implemented	unimplemented	t	/api/unstable/<owner>/<repo>/lines_changed	\N	Lines Of Code Changed	growth-maturity-decline	"lines-of-code-changed"	Insight Worker	0.0.1	githubapi	2019-06-20 22:49:32	\N
-10	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/pulls/maintainer_response_time	\N	Maintainer Response To Merge Request Duration	growth-maturity-decline	"maintainer-response-to-merge-request-duration"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:33	\N
-11	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/code_review_iteration	\N	Code Review Iteration	growth-maturity-decline	"code-review-iteration"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:35	\N
-12	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/forks	\N	Forks	growth-maturity-decline	"forks"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:36	\N
-13	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/pulls	\N	Pull Requests Open	growth-maturity-decline	"pull-requests-open"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:38	\N
-14	growth-maturity-decline	ghtorrent	timeseries	implemented	unimplemented	f	/api/unstable/<owner>/<repo>/timeseries/pulls/closed	\N	Pull Requests Closed	growth-maturity-decline	"pull-requests-closed"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:39	\N
-15	growth-maturity-decline	ghtorrent	timeseries	implemented	unimplemented	f	/api/unstable/<owner>/<repo>/timeseries/pulls/response_time	\N	Pull Request Comment Duration	growth-maturity-decline	"pull-request-comment-duration"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:41	\N
-16	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/pulls/comments	\N	Pull Request Comments	growth-maturity-decline	"pull-request-comments"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:42	\N
-17	growth-maturity-decline	augur_db	metric	implemented	unimplemented	t	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/contributors	\N	Contributors	growth-maturity-decline	"contributors"	Insight Worker	0.0.1	augur_db	2019-06-20 22:49:44	\N
-18	growth-maturity-decline	githubapi	metric	implemented	unimplemented	t	/api/unstable/<owner>/<repo>/githubapi/contributors	\N	Contributors	growth-maturity-decline	"contributors"	Insight Worker	0.0.1	githubapi	2019-06-20 22:49:45	\N
-19	growth-maturity-decline	ghtorrent	metric	implemented	implemented	t	/api/unstable/<owner>/<repo>/contributors	\N	Contributors	growth-maturity-decline	"contributors"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:47	\N
-20	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/community_engagement	\N	Community Engagement	growth-maturity-decline	"community-engagement"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:48	\N
-21	growth-maturity-decline	augur_db	metric	implemented	unimplemented	t	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/sub-projects	\N	Sub Projects	growth-maturity-decline	"sub-projects"	Insight Worker	0.0.1	augur_db	2019-06-20 22:49:50	\N
-22	growth-maturity-decline	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/contribution_acceptance	\N	Contribution Acceptance	growth-maturity-decline	"contribution-acceptance"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:49:51	\N
-23	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/code-changes	\N	Code Changes	experimental	"code-changes"	Insight Worker	0.0.1	augur_db	2019-06-20 22:49:53	\N
-24	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/pull-requests-merge-contributor-new	\N	Pull Requests Merge Contributor New	experimental	"pull-requests-merge-contributor-new"	Insight Worker	0.0.1	augur_db	2019-06-20 22:49:55	\N
-25	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/issues-first-time-opened	\N	Issues First Time Opened	experimental	"issues-first-time-opened"	Insight Worker	0.0.1	augur_db	2019-06-20 22:49:56	\N
-26	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/issues-first-time-closed	\N	Issues First Time Closed	experimental	"issues-first-time-closed"	Insight Worker	0.0.1	augur_db	2019-06-20 22:49:58	\N
-27	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/contributors-new	\N	Contributors New	experimental	"contributors-new"	Insight Worker	0.0.1	augur_db	2019-06-20 22:49:59	\N
-28	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/code-changes-lines	\N	Code Changes Lines	experimental	"code-changes-lines"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:01	\N
-29	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/issues-new	\N	Issues New	experimental	"issues-new"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:02	\N
-30	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/issues-closed	\N	Issues Closed	experimental	"issues-closed"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:04	\N
-31	experimental	augur_db	metric	implemented	unimplemented	f	none	\N	Issue Duration	experimental	"issue-duration"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:05	\N
-32	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/issue-backlog	\N	Issue Backlog	experimental	"issue-backlog"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:07	\N
-33	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/issues-open-age	\N	Issues Open Age	experimental	"issues-open-age"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:08	\N
-34	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/issues-closed-resolution-duration	\N	Issues Closed Resolution Duration	experimental	"issues-closed-resolution-duration"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:10	\N
-35	experimental	augur_db	metric	implemented	unimplemented	f	none	\N	Lines Changed By Author	experimental	"lines-changed-by-author"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:11	\N
-36	experimental	augur_db	git	implemented	unimplemented	f	/api/unstable/repo-groups	\N	Repo Groups	experimental	"repo-groups"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:13	\N
-37	experimental	augur_db	git	implemented	unimplemented	f	/api/unstable/repos	\N	Downloaded Repos	experimental	"downloaded-repos"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:15	\N
-38	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/open-issues-count	\N	Open Issues Count	experimental	"closed-issues-count"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:16	\N
-39	experimental	augur_db	metric	implemented	unimplemented	f	/api/unstable/repo-groups/<repo_group_id>/repos/<repo_id>/closed-issues-count	\N	Closed Issues Count	experimental	"closed-issues-count"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:18	\N
-40	experimental	augur_db	git	implemented	unimplemented	f	/api/unstable/repos/<owner>/<repo>	\N	Get Repo	experimental	"get-repo"	Insight Worker	0.0.1	augur_db	2019-06-20 22:50:19	\N
-41	experimental	downloads	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/downloads	\N	Downloads	experimental	"downloads"	Insight Worker	0.0.1	downloads	2019-06-20 22:50:21	\N
-42	experimental	githubapi	metric	implemented	unimplemented	f	/api/unstable/<owner>/<repo>/githubapi/pull_requests_closed	\N	Pull Requests Closed	experimental	"pull_requests_closed"	Insight Worker	0.0.1	githubapi	2019-06-20 22:50:22	\N
-43	experimental	githubapi	metric	implemented	unimplemented	f	/api/unstable/<owner>/<repo>/githubapi/pull_requests_merged	\N	Pull Requests Merged	experimental	"pull_requests_merged"	Insight Worker	0.0.1	githubapi	2019-06-20 22:50:24	\N
-44	experimental	githubapi	metric	implemented	unimplemented	f	/api/unstable/<owner>/<repo>/githubapi/pull_requests_open	\N	Pull Requests Open	experimental	"pull_requests_open"	Insight Worker	0.0.1	githubapi	2019-06-20 22:50:25	\N
-45	experimental	githubapi	metric	implemented	unimplemented	t	/api/unstable/<owner>/<repo>/githubapi/repository_size	\N	Repository Size	experimental	"repository-size"	Insight Worker	0.0.1	githubapi	2019-06-20 22:50:27	\N
-46	experimental	githubapi	metric	implemented	implemented	t	/api/unstable/<owner>/<repo>/bus_factor	\N	Bus Factor	experimental	"bus-factor"	Insight Worker	0.0.1	githubapi	2019-06-20 22:50:28	\N
-47	experimental	githubapi	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/tags/major	\N	Major Tags	experimental	"major-tags"	Insight Worker	0.0.1	githubapi	2019-06-20 22:50:30	\N
-48	experimental	githubapi	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/tags	\N	Tags	experimental	"tags"	Insight Worker	0.0.1	githubapi	2019-06-20 22:50:31	\N
-49	experimental	facade	git	implemented	unimplemented	f	/api/unstable/git/repos	\N	Downloaded Repos	experimental	"downloaded-repos"	Insight Worker	0.0.1	facade	2019-06-20 22:50:33	\N
-50	experimental	facade	git	implemented	implemented	f	/api/unstable/git/changes_by_author	\N	Lines Changed By Author	experimental	"lines-changed-by-author"	Insight Worker	0.0.1	facade	2019-06-20 22:50:35	\N
-51	experimental	facade	git	implemented	unimplemented	f	/api/unstable/git/lines_changed_by_week	\N	Lines Changed By Week	experimental	"lines-changed-by-week"	Insight Worker	0.0.1	facade	2019-06-20 22:50:36	\N
-52	experimental	facade	git	implemented	unimplemented	f	/api/unstable/git/lines_changed_by_month	\N	Lines Changed By Month	experimental	"lines-changed-by-month"	Insight Worker	0.0.1	facade	2019-06-20 22:50:38	\N
-53	experimental	facade	git	implemented	unimplemented	f	/api/unstable/git/commits_by_week	\N	Commits By Week	experimental	"commits-by-week"	Insight Worker	0.0.1	facade	2019-06-20 22:50:40	\N
-54	experimental	facade	git	implemented	implemented	f	/api/unstable/git/facade_project	\N	Facade Project	experimental	"facade-project"	Insight Worker	0.0.1	facade	2019-06-20 22:50:41	\N
-55	experimental	facade	metric	implemented	unimplemented	f	none	\N	Annual Commit Count Ranked By New Repo In Repo Group	experimental	"annual-commit-count-ranked-by-new-repo-in-repo-group"	Insight Worker	0.0.1	facade	2019-06-20 22:50:43	\N
-56	experimental	facade	metric	implemented	unimplemented	f	none	\N	Annual Lines Of Code Count Ranked By New Repo In Repo Group	experimental	"annual-lines-of-code-count-ranked-by-new-repo-in-repo-group"	Insight Worker	0.0.1	facade	2019-06-20 22:50:44	\N
-57	experimental	facade	metric	implemented	unimplemented	f	none	\N	Annual Commit Count Ranked By Repo In Repo Group	experimental	"annual-commit-count-ranked-by-repo-in-repo-group"	Insight Worker	0.0.1	facade	2019-06-20 22:50:46	\N
-58	experimental	facade	metric	implemented	unimplemented	f	none	\N	Annual Lines Of Code Count Ranked By Repo In Repo Group	experimental	"annual-lines-of-code-count-ranked-by-repo-in-repo-group"	Insight Worker	0.0.1	facade	2019-06-20 22:50:48	\N
-59	experimental	facade	metric	implemented	unimplemented	f	none	\N	Lines Of Code Commit Counts By Calendar Year Grouped	experimental	"lines-of-code-commit-counts-by-calendar-year-grouped"	Insight Worker	0.0.1	facade	2019-06-20 22:50:49	\N
-60	experimental	facade	metric	implemented	unimplemented	f	none	\N	Unaffiliated Contributors Lines Of Code Commit Counts By Calendar Year Grouped	experimental	"unaffiliated-contributors-lines-of-code-commit-counts-by-calendar-year-grouped"	Insight Worker	0.0.1	facade	2019-06-20 22:50:51	\N
-61	experimental	facade	metric	implemented	unimplemented	f	none	\N	Repo Group Lines Of Code Commit Counts Calendar Year Grouped	experimental	"repo-group-lines-of-code-commit-counts-calendar-year-grouped"	Insight Worker	0.0.1	facade	2019-06-20 22:50:52	\N
-62	experimental	ghtorrent	metric	implemented	implemented	f	/api/unstable/<owner>/<repo>/contributing_github_organizations	\N	Contributing Github Organizations	experimental	"contributing-github-organizations"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:50:54	\N
-63	experimental	ghtorrent	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/new_contributing_github_organizations	\N	New Contributing Github Organizations	experimental	"new-contributing-github-organizations"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:50:56	\N
-64	experimental	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/issue_comments	\N	Issue Comments	experimental	"issue-comments"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:50:57	\N
-65	experimental	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/pulls/made_closed	\N	Pull Requests Made Closed	experimental	"pull-requests-made-closed"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:50:59	\N
-66	experimental	ghtorrent	timeseries	implemented	implemented	t	/api/unstable/<owner>/<repo>/timeseries/watchers	\N	Watchers	experimental	"watchers"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:00	\N
-67	experimental	ghtorrent	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/commits100	\N	Commits100	experimental	"commits100"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:02	\N
-68	experimental	ghtorrent	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/commits/comments	\N	Commit Comments	experimental	"commit-comments"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:03	\N
-69	experimental	ghtorrent	metric	implemented	implemented	f	/api/unstable/<owner>/<repo>/committer_locations	\N	Committer Locations	experimental	"committer-locations"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:05	\N
-70	experimental	ghtorrent	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/total_committers	\N	Total Committers	experimental	"total-committers"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:07	\N
-71	experimental	ghtorrent	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/issues/activity	\N	Issue Activity	experimental	"issue-activity"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:08	\N
-72	experimental	ghtorrent	timeseries	implemented	unimplemented	f	/api/unstable/<owner>/<repo>/timeseries/pulls/acceptance_rate	\N	Pull Request Acceptance Rate	experimental	"pull-request-acceptance-rate"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:10	\N
-73	experimental	ghtorrent	metric	implemented	implemented	f	/api/unstable/<owner>/<repo>/community_age	\N	Community Age	experimental	"community-age"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:11	\N
-74	experimental	ghtorrent	metric	implemented	unimplemented	f	/api/unstable/<owner>/<repo>/timeseries/contributions	\N	Contributions	experimental	"contributions"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:13	\N
-75	experimental	ghtorrent	metric	implemented	implemented	f	/api/unstable/<owner>/<repo>/project_age	\N	Project Age	experimental	"project-age"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:14	\N
-76	experimental	ghtorrent	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/fakes	\N	Fakes	experimental	"fakes"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:16	\N
-77	experimental	ghtorrent	timeseries	implemented	unimplemented	f	/api/unstable/<owner>/<repo>/timeseries/total_watchers	\N	Total Watchers	experimental	"total-watchers"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:18	\N
-78	experimental	ghtorrent	timeseries	implemented	implemented	f	/api/unstable/<owner>/<repo>/timeseries/new_watchers	\N	New Watchers	experimental	"new-watchers"	Insight Worker	0.0.1	ghtorrent	2019-06-20 22:51:19	\N
-79	experimental	librariesio	metric	implemented	implemented	f	/api/unstable/<owner>/<repo>/dependencies	\N	Dependencies	experimental	"dependencies"	Insight Worker	0.0.1	librariesio	2019-06-20 22:51:21	\N
-80	experimental	librariesio	metric	implemented	implemented	f	/api/unstable/<owner>/<repo>/dependency_stats	\N	Dependency Stats	experimental	"dependency-stats"	Insight Worker	0.0.1	librariesio	2019-06-20 22:51:23	\N
-81	experimental	librariesio	metric	implemented	implemented	f	/api/unstable/<owner>/<repo>/dependents	\N	Dependents	experimental	"dependents"	Insight Worker	0.0.1	librariesio	2019-06-20 22:51:25	\N
-\.
-
-
---
--- Data for Name: chaoss_user; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.chaoss_user (chaoss_id, chaoss_login_name, chaoss_login_hashword, chaoss_email, chaoss_text_phone, chaoss_first_name, chaoss_last_name, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: commit_comment_ref; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.commit_comment_ref (cmt_comment_id, cmt_id, repo_id, msg_id, user_id, body, line, "position", commit_comment_src_node_id, cmt_comment_src_id, created_at, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: commit_parents; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.commit_parents (cmt_id, parent_id, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: commits; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.commits (cmt_id, repo_id, cmt_commit_hash, cmt_author_name, cmt_author_raw_email, cmt_author_email, cmt_author_date, cmt_author_affiliation, cmt_committer_name, cmt_committer_raw_email, cmt_committer_email, cmt_committer_date, cmt_committer_affiliation, cmt_added, cmt_removed, cmt_whitespace, cmt_filename, cmt_date_attempted, cmt_ght_committer_id, cmt_ght_committed_at, cmt_committer_timestamp, cmt_author_timestamp, cmt_author_platform_username, tool_source, tool_version, data_source, data_collection_date, cmt_ght_author_id) FROM stdin;
-\.
-
-
---
--- Data for Name: contributor_affiliations; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.contributor_affiliations (ca_id, ca_domain, ca_start_date, ca_last_used, ca_affiliation, ca_active, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-1	samsung.com	1970-01-01	2018-08-01 18:37:54	Samsung	1	load	1.0	load	1970-01-01 00:00:00
-2	linuxfoundation.org	1970-01-01	2018-08-01 18:37:54	Linux Foundation	1	load	1.0	load	1970-01-01 00:00:00
-3	ibm.com	1970-01-01	2018-08-01 18:37:54	IBM	1	load	1.0	load	1970-01-01 00:00:00
-8	walmart.com	1970-01-01	2018-09-01 06:00:00	Walmart	1	load	1.0	load	1970-01-01 00:00:00
-9	exxonmobil.com	1970-01-01	2018-09-01 06:00:00	Exxon Mobil	1	load	1.0	load	1970-01-01 00:00:00
-10	ge.com	1970-01-01	2018-09-01 06:00:00	General Electric	1	load	1.0	load	1970-01-01 00:00:00
-11	dupont.com	1970-01-01	2018-09-01 06:00:00	DuPont	1	load	1.0	load	1970-01-01 00:00:00
-12	avnet.com	1970-01-01	2018-09-01 06:00:00	Avnet	1	load	1.0	load	1970-01-01 00:00:00
-13	macysinc.com	1970-01-01	2018-09-01 06:00:00	Macys	1	load	1.0	load	1970-01-01 00:00:00
-14	enterpriseproducts.com	1970-01-01	2018-09-01 06:00:00	Enterprise Products Partners	1	load	1.0	load	1970-01-01 00:00:00
-15	travelers.com	1970-01-01	2018-09-01 06:00:00	Travelers Cos.	1	load	1.0	load	1970-01-01 00:00:00
-16	pmi.com	1970-01-01	2018-09-01 06:00:00	Philip Morris International	1	load	1.0	load	1970-01-01 00:00:00
-17	riteaid.com	1970-01-01	2018-09-01 06:00:00	Rite Aid	1	load	1.0	load	1970-01-01 00:00:00
-18	techdata.com	1970-01-01	2018-09-01 06:00:00	Tech Data	1	load	1.0	load	1970-01-01 00:00:00
-25156	pivotal.io	1970-01-01	2020-03-25 00:30:57	VMware	1	Manual Entry	0.0.0	Gabe	2020-03-25 00:30:57
-25157	vmware.com	1970-01-01	2020-03-25 00:33:35	VMware	1	Manual Entry	0.0.0	Gabe	2020-03-25 00:33:35
-25158	rabbitmq.com	1970-01-01	2020-03-25 00:33:43	VMware	1	Manual Entry	0.0.0	Gabe	2020-03-25 00:33:43
-25161	pivotallabs.com	1970-01-01	2020-03-25 00:43:53	VMware	1	Manual Entry	0.0.0	Gabe	2020-03-25 00:43:53
-25162	cloudcredo.com	1970-01-01	2020-03-25 00:44:18	VMware	1	Manual Entry	0.0.0	Gabe	2020-03-25 00:44:18
-25163	gopivotal.com	1970-01-01	2020-03-25 00:44:25	VMware	1	Manual Entry	0.0.0	Gabe	2020-03-25 00:44:25
-25164	heptio.com	1970-01-01	2020-03-25 00:44:32	VMware	1	Manual Entry	0.0.0	Gabe	2020-03-25 00:44:32
-19	aboutmcdonalds.com	1970-01-01	2018-09-01 06:00:00	McDonalds	1	load	1.0	load	1970-01-01 00:00:00
-20	qualcomm.com	1970-01-01	2018-09-01 06:00:00	Qualcomm	1	load	1.0	load	1970-01-01 00:00:00
-21	amerisourcebergen.com	1970-01-01	2018-09-01 06:00:00	AmerisourceBergen	1	load	1.0	load	1970-01-01 00:00:00
-22	searsholdings.com	1970-01-01	2018-09-01 06:00:00	Sears Holdings	1	load	1.0	load	1970-01-01 00:00:00
-23	capitalone.com	1970-01-01	2018-09-01 06:00:00	Capital One Financial	1	load	1.0	load	1970-01-01 00:00:00
-24	emc.com	1970-01-01	2018-09-01 06:00:00	EMC	1	load	1.0	load	1970-01-01 00:00:00
-25	usaa.com	1970-01-01	2018-09-01 06:00:00	USAA	1	load	1.0	load	1970-01-01 00:00:00
-26	duke-energy.com	1970-01-01	2018-09-01 06:00:00	Duke Energy	1	load	1.0	load	1970-01-01 00:00:00
-27	twc.com	1970-01-01	2018-09-01 06:00:00	Time Warner Cable	1	load	1.0	load	1970-01-01 00:00:00
-28	halliburton.com	1970-01-01	2018-09-01 06:00:00	Halliburton	1	load	1.0	load	1970-01-01 00:00:00
-29	northropgrumman.com	1970-01-01	2018-09-01 06:00:00	Northrop Grumman	1	load	1.0	load	1970-01-01 00:00:00
-30	arrow.com	1970-01-01	2018-09-01 06:00:00	Arrow Electronics	1	load	1.0	load	1970-01-01 00:00:00
-31	raytheon.com	1970-01-01	2018-09-01 06:00:00	Raytheon	1	load	1.0	load	1970-01-01 00:00:00
-32	verizon.com	1970-01-01	2018-09-01 06:00:00	Verizon	1	load	1.0	load	1970-01-01 00:00:00
-33	plainsallamerican.com	1970-01-01	2018-09-01 06:00:00	Plains GP Holdings	1	load	1.0	load	1970-01-01 00:00:00
-34	usfoods.com	1970-01-01	2018-09-01 06:00:00	US Foods	1	load	1.0	load	1970-01-01 00:00:00
-35	abbvie.com	1970-01-01	2018-09-01 06:00:00	AbbVie	1	load	1.0	load	1970-01-01 00:00:00
-36	centene.com	1970-01-01	2018-09-01 06:00:00	Centene	1	load	1.0	load	1970-01-01 00:00:00
-37	chs.net	1970-01-01	2018-09-01 06:00:00	Community Health Systems	1	load	1.0	load	1970-01-01 00:00:00
-38	arconic.com	1970-01-01	2018-09-01 06:00:00	Arconic	1	load	1.0	load	1970-01-01 00:00:00
-39	internationalpaper.com	1970-01-01	2018-09-01 06:00:00	International Paper	1	load	1.0	load	1970-01-01 00:00:00
-40	emerson.com	1970-01-01	2018-09-01 06:00:00	Emerson Electric	1	load	1.0	load	1970-01-01 00:00:00
-41	up.com	1970-01-01	2018-09-01 06:00:00	Union Pacific	1	load	1.0	load	1970-01-01 00:00:00
-42	amgen.com	1970-01-01	2018-09-01 06:00:00	Amgen	1	load	1.0	load	1970-01-01 00:00:00
-43	chevron.com	1970-01-01	2018-09-01 06:00:00	Chevron	1	load	1.0	load	1970-01-01 00:00:00
-44	usbank.com	1970-01-01	2018-09-01 06:00:00	U.S. Bancorp	1	load	1.0	load	1970-01-01 00:00:00
-45	staples.com	1970-01-01	2018-09-01 06:00:00	Staples	1	load	1.0	load	1970-01-01 00:00:00
-46	danaher.com	1970-01-01	2018-09-01 06:00:00	Danaher	1	load	1.0	load	1970-01-01 00:00:00
-47	whirlpoolcorp.com	1970-01-01	2018-09-01 06:00:00	Whirlpool	1	load	1.0	load	1970-01-01 00:00:00
-48	aflac.com	1970-01-01	2018-09-01 06:00:00	Aflac	1	load	1.0	load	1970-01-01 00:00:00
-49	autonation.com	1970-01-01	2018-09-01 06:00:00	AutoNation	1	load	1.0	load	1970-01-01 00:00:00
-50	progressive.com	1970-01-01	2018-09-01 06:00:00	Progressive	1	load	1.0	load	1970-01-01 00:00:00
-51	abbott.com	1970-01-01	2018-09-01 06:00:00	Abbott Laboratories	1	load	1.0	load	1970-01-01 00:00:00
-52	dollargeneral.com	1970-01-01	2018-09-01 06:00:00	Dollar General	1	load	1.0	load	1970-01-01 00:00:00
-53	tenethealth.com	1970-01-01	2018-09-01 06:00:00	Tenet Healthcare	1	load	1.0	load	1970-01-01 00:00:00
-54	costco.com	1970-01-01	2018-09-01 06:00:00	Costco	1	load	1.0	load	1970-01-01 00:00:00
-55	lilly.com	1970-01-01	2018-09-01 06:00:00	Eli Lilly	1	load	1.0	load	1970-01-01 00:00:00
-56	southwest.com	1970-01-01	2018-09-01 06:00:00	Southwest Airlines	1	load	1.0	load	1970-01-01 00:00:00
-57	penskeautomotive.com	1970-01-01	2018-09-01 06:00:00	Penske Automotive Group	1	load	1.0	load	1970-01-01 00:00:00
-58	manpowergroup.com	1970-01-01	2018-09-01 06:00:00	ManpowerGroup	1	load	1.0	load	1970-01-01 00:00:00
-59	kohlscorporation.com	1970-01-01	2018-09-01 06:00:00	Kohls	1	load	1.0	load	1970-01-01 00:00:00
-60	starbucks.com	1970-01-01	2018-09-01 06:00:00	Starbucks	1	load	1.0	load	1970-01-01 00:00:00
-61	paccar.com	1970-01-01	2018-09-01 06:00:00	Paccar	1	load	1.0	load	1970-01-01 00:00:00
-62	cummins.com	1970-01-01	2018-09-01 06:00:00	Cummins	1	load	1.0	load	1970-01-01 00:00:00
-63	altria.com	1970-01-01	2018-09-01 06:00:00	Altria Group	1	load	1.0	load	1970-01-01 00:00:00
-64	xerox.com	1970-01-01	2018-09-01 06:00:00	Xerox	1	load	1.0	load	1970-01-01 00:00:00
-65	fanniemae.com	1970-01-01	2018-09-01 06:00:00	Fannie Mae	1	load	1.0	load	1970-01-01 00:00:00
-66	kimberly-clark.com	1970-01-01	2018-09-01 06:00:00	Kimberly-Clark	1	load	1.0	load	1970-01-01 00:00:00
-67	thehartford.com	1970-01-01	2018-09-01 06:00:00	Hartford Financial Services Group	1	load	1.0	load	1970-01-01 00:00:00
-68	kraftheinzcompany.com	1970-01-01	2018-09-01 06:00:00	Kraft Heinz	1	load	1.0	load	1970-01-01 00:00:00
-69	lear.com	1970-01-01	2018-09-01 06:00:00	Lear	1	load	1.0	load	1970-01-01 00:00:00
-70	fluor.com	1970-01-01	2018-09-01 06:00:00	Fluor	1	load	1.0	load	1970-01-01 00:00:00
-71	aecom.com	1970-01-01	2018-09-01 06:00:00	AECOM	1	load	1.0	load	1970-01-01 00:00:00
-72	facebook.com	1970-01-01	2018-09-01 06:00:00	Facebook	1	load	1.0	load	1970-01-01 00:00:00
-73	jabil.com	1970-01-01	2018-09-01 06:00:00	Jabil Circuit	1	load	1.0	load	1970-01-01 00:00:00
-74	centurylink.com	1970-01-01	2018-09-01 06:00:00	CenturyLink	1	load	1.0	load	1970-01-01 00:00:00
-75	supervalu.com	1970-01-01	2018-09-01 06:00:00	Supervalu	1	load	1.0	load	1970-01-01 00:00:00
-76	thekrogerco.com	1970-01-01	2018-09-01 06:00:00	Kroger	1	load	1.0	load	1970-01-01 00:00:00
-77	generalmills.com	1970-01-01	2018-09-01 06:00:00	General Mills	1	load	1.0	load	1970-01-01 00:00:00
-78	southerncompany.com	1970-01-01	2018-09-01 06:00:00	Southern	1	load	1.0	load	1970-01-01 00:00:00
-79	nexteraenergy.com	1970-01-01	2018-09-01 06:00:00	NextEra Energy	1	load	1.0	load	1970-01-01 00:00:00
-80	thermofisher.com	1970-01-01	2018-09-01 06:00:00	Thermo Fisher Scientific	1	load	1.0	load	1970-01-01 00:00:00
-81	aep.com	1970-01-01	2018-09-01 06:00:00	American Electric Power	1	load	1.0	load	1970-01-01 00:00:00
-82	pge.com	1970-01-01	2018-09-01 06:00:00	PG&E Corp.	1	load	1.0	load	1970-01-01 00:00:00
-83	nglenergypartners.com	1970-01-01	2018-09-01 06:00:00	NGL Energy Partners	1	load	1.0	load	1970-01-01 00:00:00
-84	bms.com	1970-01-01	2018-09-01 06:00:00	Bristol-Myers Squibb	1	load	1.0	load	1970-01-01 00:00:00
-85	goodyear.com	1970-01-01	2018-09-01 06:00:00	Goodyear Tire & Rubber	1	load	1.0	load	1970-01-01 00:00:00
-86	nucor.com	1970-01-01	2018-09-01 06:00:00	Nucor	1	load	1.0	load	1970-01-01 00:00:00
-87	amazon.com	1970-01-01	2018-09-01 06:00:00	Amazon.com	1	load	1.0	load	1970-01-01 00:00:00
-88	pnc.com	1970-01-01	2018-09-01 06:00:00	PNC Financial Services Group	1	load	1.0	load	1970-01-01 00:00:00
-89	healthnet.com	1970-01-01	2018-09-01 06:00:00	Health Net	1	load	1.0	load	1970-01-01 00:00:00
-90	micron.com	1970-01-01	2018-09-01 06:00:00	Micron Technology	1	load	1.0	load	1970-01-01 00:00:00
-91	colgatepalmolive.com	1970-01-01	2018-09-01 06:00:00	Colgate-Palmolive	1	load	1.0	load	1970-01-01 00:00:00
-92	fcx.com	1970-01-01	2018-09-01 06:00:00	Freeport-McMoRan	1	load	1.0	load	1970-01-01 00:00:00
-93	conagrafoods.com	1970-01-01	2018-09-01 06:00:00	ConAgra Foods	1	load	1.0	load	1970-01-01 00:00:00
-94	gapinc.com	1970-01-01	2018-09-01 06:00:00	Gap	1	load	1.0	load	1970-01-01 00:00:00
-95	bakerhughes.com	1970-01-01	2018-09-01 06:00:00	Baker Hughes	1	load	1.0	load	1970-01-01 00:00:00
-96	bnymellon.com	1970-01-01	2018-09-01 06:00:00	Bank of New York Mellon Corp.	1	load	1.0	load	1970-01-01 00:00:00
-97	dollartree.com	1970-01-01	2018-09-01 06:00:00	Dollar Tree	1	load	1.0	load	1970-01-01 00:00:00
-98	walgreensbootsalliance.com	1970-01-01	2018-09-01 06:00:00	Walgreens	1	load	1.0	load	1970-01-01 00:00:00
-99	wholefoodsmarket.com	1970-01-01	2018-09-01 06:00:00	Whole Foods Market	1	load	1.0	load	1970-01-01 00:00:00
-100	ppg.com	1970-01-01	2018-09-01 06:00:00	PPG Industries	1	load	1.0	load	1970-01-01 00:00:00
-101	genpt.com	1970-01-01	2018-09-01 06:00:00	Genuine Parts	1	load	1.0	load	1970-01-01 00:00:00
-102	ielp.com	1970-01-01	2018-09-01 06:00:00	Icahn Enterprises	1	load	1.0	load	1970-01-01 00:00:00
-103	pfgc.com	1970-01-01	2018-09-01 06:00:00	Performance Food Group	1	load	1.0	load	1970-01-01 00:00:00
-104	omnicomgroup.com	1970-01-01	2018-09-01 06:00:00	Omnicom Group	1	load	1.0	load	1970-01-01 00:00:00
-105	dish.com	1970-01-01	2018-09-01 06:00:00	DISH Network	1	load	1.0	load	1970-01-01 00:00:00
-106	firstenergycorp.com	1970-01-01	2018-09-01 06:00:00	FirstEnergy	1	load	1.0	load	1970-01-01 00:00:00
-107	monsanto.com	1970-01-01	2018-09-01 06:00:00	Monsanto	1	load	1.0	load	1970-01-01 00:00:00
-108	aes.com	1970-01-01	2018-09-01 06:00:00	AES	1	load	1.0	load	1970-01-01 00:00:00
-109	hp.com	1970-01-01	2018-09-01 06:00:00	HP	1	load	1.0	load	1970-01-01 00:00:00
-110	carmax.com	1970-01-01	2018-09-01 06:00:00	CarMax	1	load	1.0	load	1970-01-01 00:00:00
-111	nov.com	1970-01-01	2018-09-01 06:00:00	National Oilwell Varco	1	load	1.0	load	1970-01-01 00:00:00
-112	nrgenergy.com	1970-01-01	2018-09-01 06:00:00	NRG Energy	1	load	1.0	load	1970-01-01 00:00:00
-113	westerndigital.com	1970-01-01	2018-09-01 06:00:00	Western Digital	1	load	1.0	load	1970-01-01 00:00:00
-114	marriott.com	1970-01-01	2018-09-01 06:00:00	Marriott International	1	load	1.0	load	1970-01-01 00:00:00
-115	officedepot.com	1970-01-01	2018-09-01 06:00:00	Office Depot	1	load	1.0	load	1970-01-01 00:00:00
-116	nordstrom.com	1970-01-01	2018-09-01 06:00:00	Nordstrom	1	load	1.0	load	1970-01-01 00:00:00
-117	kindermorgan.com	1970-01-01	2018-09-01 06:00:00	Kinder Morgan	1	load	1.0	load	1970-01-01 00:00:00
-118	aramark.com	1970-01-01	2018-09-01 06:00:00	Aramark	1	load	1.0	load	1970-01-01 00:00:00
-119	davita.com	1970-01-01	2018-09-01 06:00:00	DaVita	1	load	1.0	load	1970-01-01 00:00:00
-120	apple.com	1970-01-01	2018-09-01 06:00:00	Apple	1	load	1.0	load	1970-01-01 00:00:00
-121	cardinal.com	1970-01-01	2018-09-01 06:00:00	Cardinal Health	1	load	1.0	load	1970-01-01 00:00:00
-122	molinahealthcare.com	1970-01-01	2018-09-01 06:00:00	Molina Healthcare	1	load	1.0	load	1970-01-01 00:00:00
-123	wellcare.com	1970-01-01	2018-09-01 06:00:00	WellCare Health Plans	1	load	1.0	load	1970-01-01 00:00:00
-124	cbscorporation.com	1970-01-01	2018-09-01 06:00:00	CBS	1	load	1.0	load	1970-01-01 00:00:00
-125	visa.com	1970-01-01	2018-09-01 06:00:00	Visa	1	load	1.0	load	1970-01-01 00:00:00
-126	lfg.com	1970-01-01	2018-09-01 06:00:00	Lincoln National	1	load	1.0	load	1970-01-01 00:00:00
-127	ecolab.com	1970-01-01	2018-09-01 06:00:00	Ecolab	1	load	1.0	load	1970-01-01 00:00:00
-128	kelloggcompany.com	1970-01-01	2018-09-01 06:00:00	Kellogg	1	load	1.0	load	1970-01-01 00:00:00
-129	chrobinson.com	1970-01-01	2018-09-01 06:00:00	C.H. Robinson Worldwide	1	load	1.0	load	1970-01-01 00:00:00
-130	textron.com	1970-01-01	2018-09-01 06:00:00	Textron	1	load	1.0	load	1970-01-01 00:00:00
-131	loews.com	1970-01-01	2018-09-01 06:00:00	Loews	1	load	1.0	load	1970-01-01 00:00:00
-132	express-scripts.com	1970-01-01	2018-09-01 06:00:00	Express Scripts Holding	1	load	1.0	load	1970-01-01 00:00:00
-133	itw.com	1970-01-01	2018-09-01 06:00:00	Illinois Tool Works	1	load	1.0	load	1970-01-01 00:00:00
-134	synnex.com	1970-01-01	2018-09-01 06:00:00	Synnex	1	load	1.0	load	1970-01-01 00:00:00
-135	viacom.com	1970-01-01	2018-09-01 06:00:00	Viacom	1	load	1.0	load	1970-01-01 00:00:00
-136	hollyfrontier.com	1970-01-01	2018-09-01 06:00:00	HollyFrontier	1	load	1.0	load	1970-01-01 00:00:00
-137	landolakesinc.com	1970-01-01	2018-09-01 06:00:00	Land O Lakes	1	load	1.0	load	1970-01-01 00:00:00
-138	devonenergy.com	1970-01-01	2018-09-01 06:00:00	Devon Energy	1	load	1.0	load	1970-01-01 00:00:00
-139	pbfenergy.com	1970-01-01	2018-09-01 06:00:00	PBF Energy	1	load	1.0	load	1970-01-01 00:00:00
-140	yum.com	1970-01-01	2018-09-01 06:00:00	Yum Brands	1	load	1.0	load	1970-01-01 00:00:00
-141	ti.com	1970-01-01	2018-09-01 06:00:00	Texas Instruments	1	load	1.0	load	1970-01-01 00:00:00
-142	cdw.com	1970-01-01	2018-09-01 06:00:00	CDW	1	load	1.0	load	1970-01-01 00:00:00
-143	jpmorganchase.com	1970-01-01	2018-09-01 06:00:00	J.P. Morgan Chase	1	load	1.0	load	1970-01-01 00:00:00
-144	wm.com	1970-01-01	2018-09-01 06:00:00	Waste Management	1	load	1.0	load	1970-01-01 00:00:00
-145	mmc.com	1970-01-01	2018-09-01 06:00:00	Marsh & McLennan	1	load	1.0	load	1970-01-01 00:00:00
-146	chk.com	1970-01-01	2018-09-01 06:00:00	Chesapeake Energy	1	load	1.0	load	1970-01-01 00:00:00
-147	parker.com	1970-01-01	2018-09-01 06:00:00	Parker-Hannifin	1	load	1.0	load	1970-01-01 00:00:00
-148	oxy.com	1970-01-01	2018-09-01 06:00:00	Occidental Petroleum	1	load	1.0	load	1970-01-01 00:00:00
-149	guardianlife.com	1970-01-01	2018-09-01 06:00:00	Guardian Life Ins. Co. of America	1	load	1.0	load	1970-01-01 00:00:00
-150	farmers.com	1970-01-01	2018-09-01 06:00:00	Farmers Insurance Exchange	1	load	1.0	load	1970-01-01 00:00:00
-151	jcpenney.com	1970-01-01	2018-09-01 06:00:00	J.C. Penney	1	load	1.0	load	1970-01-01 00:00:00
-152	conedison.com	1970-01-01	2018-09-01 06:00:00	Consolidated Edison	1	load	1.0	load	1970-01-01 00:00:00
-153	cognizant.com	1970-01-01	2018-09-01 06:00:00	Cognizant Technology Solutions	1	load	1.0	load	1970-01-01 00:00:00
-154	boeing.com	1970-01-01	2018-09-01 06:00:00	Boeing	1	load	1.0	load	1970-01-01 00:00:00
-155	vfc.com	1970-01-01	2018-09-01 06:00:00	VF	1	load	1.0	load	1970-01-01 00:00:00
-156	ameriprise.com	1970-01-01	2018-09-01 06:00:00	Ameriprise Financial	1	load	1.0	load	1970-01-01 00:00:00
-157	csc.com	1970-01-01	2018-09-01 06:00:00	Computer Sciences	1	load	1.0	load	1970-01-01 00:00:00
-158	lb.com	1970-01-01	2018-09-01 06:00:00	L Brands	1	load	1.0	load	1970-01-01 00:00:00
-159	jacobs.com	1970-01-01	2018-09-01 06:00:00	Jacobs Engineering Group	1	load	1.0	load	1970-01-01 00:00:00
-160	principal.com	1970-01-01	2018-09-01 06:00:00	Principal Financial Group	1	load	1.0	load	1970-01-01 00:00:00
-161	rossstores.com	1970-01-01	2018-09-01 06:00:00	Ross Stores	1	load	1.0	load	1970-01-01 00:00:00
-162	bedbathandbeyond.com	1970-01-01	2018-09-01 06:00:00	Bed Bath & Beyond	1	load	1.0	load	1970-01-01 00:00:00
-163	csx.com	1970-01-01	2018-09-01 06:00:00	CSX	1	load	1.0	load	1970-01-01 00:00:00
-164	toysrusinc.com	1970-01-01	2018-09-01 06:00:00	Toys \\"R\\" Us	1	load	1.0	load	1970-01-01 00:00:00
-165	microsoft.com	1970-01-01	2018-09-01 06:00:00	Microsoft	1	load	1.0	load	1970-01-01 00:00:00
-166	sands.com	1970-01-01	2018-09-01 06:00:00	Las Vegas Sands	1	load	1.0	load	1970-01-01 00:00:00
-167	leucadia.com	1970-01-01	2018-09-01 06:00:00	Leucadia National	1	load	1.0	load	1970-01-01 00:00:00
-168	dom.com	1970-01-01	2018-09-01 06:00:00	Dominion Resources	1	load	1.0	load	1970-01-01 00:00:00
-169	ussteel.com	1970-01-01	2018-09-01 06:00:00	United States Steel	1	load	1.0	load	1970-01-01 00:00:00
-170	l-3com.com	1970-01-01	2018-09-01 06:00:00	L-3 Communications	1	load	1.0	load	1970-01-01 00:00:00
-171	edisoninvestor.com	1970-01-01	2018-09-01 06:00:00	Edison International	1	load	1.0	load	1970-01-01 00:00:00
-172	entergy.com	1970-01-01	2018-09-01 06:00:00	Entergy	1	load	1.0	load	1970-01-01 00:00:00
-173	adp.com	1970-01-01	2018-09-01 06:00:00	ADP	1	load	1.0	load	1970-01-01 00:00:00
-174	firstdata.com	1970-01-01	2018-09-01 06:00:00	First Data	1	load	1.0	load	1970-01-01 00:00:00
-175	blackrock.com	1970-01-01	2018-09-01 06:00:00	BlackRock	1	load	1.0	load	1970-01-01 00:00:00
-176	bankofamerica.com	1970-01-01	2018-09-01 06:00:00	Bank of America Corp.	1	load	1.0	load	1970-01-01 00:00:00
-177	westrock.com	1970-01-01	2018-09-01 06:00:00	WestRock	1	load	1.0	load	1970-01-01 00:00:00
-178	voya.com	1970-01-01	2018-09-01 06:00:00	Voya Financial	1	load	1.0	load	1970-01-01 00:00:00
-179	sherwin.com	1970-01-01	2018-09-01 06:00:00	Sherwin-Williams	1	load	1.0	load	1970-01-01 00:00:00
-180	hiltonworldwide.com	1970-01-01	2018-09-01 06:00:00	Hilton Worldwide Holdings	1	load	1.0	load	1970-01-01 00:00:00
-181	rrdonnelley.com	1970-01-01	2018-09-01 06:00:00	R.R. Donnelley & Sons	1	load	1.0	load	1970-01-01 00:00:00
-182	stanleyblackanddecker.com	1970-01-01	2018-09-01 06:00:00	Stanley Black & Decker	1	load	1.0	load	1970-01-01 00:00:00
-183	xcelenergy.com	1970-01-01	2018-09-01 06:00:00	Xcel Energy	1	load	1.0	load	1970-01-01 00:00:00
-184	corporate.murphyusa.com	1970-01-01	2018-09-01 06:00:00	Murphy USA	1	load	1.0	load	1970-01-01 00:00:00
-185	cbre.com	1970-01-01	2018-09-01 06:00:00	CBRE Group	1	load	1.0	load	1970-01-01 00:00:00
-186	drhorton.com	1970-01-01	2018-09-01 06:00:00	D.R. Horton	1	load	1.0	load	1970-01-01 00:00:00
-187	wellsfargo.com	1970-01-01	2018-09-01 06:00:00	Wells Fargo	1	load	1.0	load	1970-01-01 00:00:00
-188	elcompanies.com	1970-01-01	2018-09-01 06:00:00	Estee Lauder	1	load	1.0	load	1970-01-01 00:00:00
-189	praxair.com	1970-01-01	2018-09-01 06:00:00	Praxair	1	load	1.0	load	1970-01-01 00:00:00
-190	biogen.com	1970-01-01	2018-09-01 06:00:00	Biogen	1	load	1.0	load	1970-01-01 00:00:00
-191	statestreet.com	1970-01-01	2018-09-01 06:00:00	State Street Corp.	1	load	1.0	load	1970-01-01 00:00:00
-192	unum.com	1970-01-01	2018-09-01 06:00:00	Unum Group	1	load	1.0	load	1970-01-01 00:00:00
-193	reynoldsamerican.com	1970-01-01	2018-09-01 06:00:00	Reynolds American	1	load	1.0	load	1970-01-01 00:00:00
-194	group1auto.com	1970-01-01	2018-09-01 06:00:00	Group 1 Automotive	1	load	1.0	load	1970-01-01 00:00:00
-195	henryschein.com	1970-01-01	2018-09-01 06:00:00	Henry Schein	1	load	1.0	load	1970-01-01 00:00:00
-196	hertz.com	1970-01-01	2018-09-01 06:00:00	Hertz Global Holdings	1	load	1.0	load	1970-01-01 00:00:00
-197	nscorp.com	1970-01-01	2018-09-01 06:00:00	Norfolk Southern	1	load	1.0	load	1970-01-01 00:00:00
-198	homedepot.com	1970-01-01	2018-09-01 06:00:00	Home Depot	1	load	1.0	load	1970-01-01 00:00:00
-199	rgare.com	1970-01-01	2018-09-01 06:00:00	Reinsurance Group of America	1	load	1.0	load	1970-01-01 00:00:00
-200	pseg.com	1970-01-01	2018-09-01 06:00:00	Public Service Enterprise Group	1	load	1.0	load	1970-01-01 00:00:00
-201	bbt.com	1970-01-01	2018-09-01 06:00:00	BB&T Corp.	1	load	1.0	load	1970-01-01 00:00:00
-202	dteenergy.com	1970-01-01	2018-09-01 06:00:00	DTE Energy	1	load	1.0	load	1970-01-01 00:00:00
-203	assurant.com	1970-01-01	2018-09-01 06:00:00	Assurant	1	load	1.0	load	1970-01-01 00:00:00
-204	globalp.com	1970-01-01	2018-09-01 06:00:00	Global Partners	1	load	1.0	load	1970-01-01 00:00:00
-205	huntsman.com	1970-01-01	2018-09-01 06:00:00	Huntsman	1	load	1.0	load	1970-01-01 00:00:00
-206	bd.com	1970-01-01	2018-09-01 06:00:00	Becton Dickinson	1	load	1.0	load	1970-01-01 00:00:00
-207	sempra.com	1970-01-01	2018-09-01 06:00:00	Sempra Energy	1	load	1.0	load	1970-01-01 00:00:00
-208	autozone.com	1970-01-01	2018-09-01 06:00:00	AutoZone	1	load	1.0	load	1970-01-01 00:00:00
-209	citigroup.com	1970-01-01	2018-09-01 06:00:00	Citigroup	1	load	1.0	load	1970-01-01 00:00:00
-210	navistar.com	1970-01-01	2018-09-01 06:00:00	Navistar International	1	load	1.0	load	1970-01-01 00:00:00
-211	precast.com	1970-01-01	2018-09-01 06:00:00	Precision Castparts	1	load	1.0	load	1970-01-01 00:00:00
-212	discoverfinancial.com	1970-01-01	2018-09-01 06:00:00	Discover Financial Services	1	load	1.0	load	1970-01-01 00:00:00
-213	libertyinteractive.com	1970-01-01	2018-09-01 06:00:00	Liberty Interactive	1	load	1.0	load	1970-01-01 00:00:00
-214	grainger.com	1970-01-01	2018-09-01 06:00:00	W.W. Grainger	1	load	1.0	load	1970-01-01 00:00:00
-215	baxter.com	1970-01-01	2018-09-01 06:00:00	Baxter International	1	load	1.0	load	1970-01-01 00:00:00
-216	stryker.com	1970-01-01	2018-09-01 06:00:00	Stryker	1	load	1.0	load	1970-01-01 00:00:00
-217	airproducts.com	1970-01-01	2018-09-01 06:00:00	Air Products & Chemicals	1	load	1.0	load	1970-01-01 00:00:00
-218	wnr.com	1970-01-01	2018-09-01 06:00:00	Western Refining	1	load	1.0	load	1970-01-01 00:00:00
-219	uhsinc.com	1970-01-01	2018-09-01 06:00:00	Universal Health Services	1	load	1.0	load	1970-01-01 00:00:00
-220	phillips66.com	1970-01-01	2018-09-01 06:00:00	Phillips 66	1	load	1.0	load	1970-01-01 00:00:00
-221	owens-minor.com	1970-01-01	2018-09-01 06:00:00	Owens & Minor	1	load	1.0	load	1970-01-01 00:00:00
-222	charter.com	1970-01-01	2018-09-01 06:00:00	Charter Communications	1	load	1.0	load	1970-01-01 00:00:00
-223	advanceautoparts.com	1970-01-01	2018-09-01 06:00:00	Advance Auto Parts	1	load	1.0	load	1970-01-01 00:00:00
-224	mastercard.com	1970-01-01	2018-09-01 06:00:00	MasterCard	1	load	1.0	load	1970-01-01 00:00:00
-225	appliedmaterials.com	1970-01-01	2018-09-01 06:00:00	Applied Materials	1	load	1.0	load	1970-01-01 00:00:00
-226	eastman.com	1970-01-01	2018-09-01 06:00:00	Eastman Chemical	1	load	1.0	load	1970-01-01 00:00:00
-227	sonicautomotive.com	1970-01-01	2018-09-01 06:00:00	Sonic Automotive	1	load	1.0	load	1970-01-01 00:00:00
-228	ally.com	1970-01-01	2018-09-01 06:00:00	Ally Financial	1	load	1.0	load	1970-01-01 00:00:00
-229	cstbrands.com	1970-01-01	2018-09-01 06:00:00	CST Brands	1	load	1.0	load	1970-01-01 00:00:00
-230	ebay.com	1970-01-01	2018-09-01 06:00:00	eBay	1	load	1.0	load	1970-01-01 00:00:00
-231	berkshirehathaway.com	1970-01-01	2018-09-01 06:00:00	Berkshire Hathaway	1	load	1.0	load	1970-01-01 00:00:00
-233	lennar.com	1970-01-01	2018-09-01 06:00:00	Lennar	1	load	1.0	load	1970-01-01 00:00:00
-234	gamestopcorp.com	1970-01-01	2018-09-01 06:00:00	GameStop	1	load	1.0	load	1970-01-01 00:00:00
-235	rsac.com	1970-01-01	2018-09-01 06:00:00	Reliance Steel & Aluminum	1	load	1.0	load	1970-01-01 00:00:00
-236	hormelfoods.com	1970-01-01	2018-09-01 06:00:00	Hormel Foods	1	load	1.0	load	1970-01-01 00:00:00
-237	celgene.com	1970-01-01	2018-09-01 06:00:00	Celgene	1	load	1.0	load	1970-01-01 00:00:00
-238	genworth.com	1970-01-01	2018-09-01 06:00:00	Genworth Financial	1	load	1.0	load	1970-01-01 00:00:00
-239	paypal.com	1970-01-01	2018-09-01 06:00:00	PayPal Holdings	1	load	1.0	load	1970-01-01 00:00:00
-240	pricelinegroup.com	1970-01-01	2018-09-01 06:00:00	Priceline Group	1	load	1.0	load	1970-01-01 00:00:00
-241	mgmresorts.com	1970-01-01	2018-09-01 06:00:00	MGM Resorts International	1	load	1.0	load	1970-01-01 00:00:00
-242	autoliv.com	1970-01-01	2018-09-01 06:00:00	Autoliv	1	load	1.0	load	1970-01-01 00:00:00
-243	valero.com	1970-01-01	2018-09-01 06:00:00	Valero Energy	1	load	1.0	load	1970-01-01 00:00:00
-244	fnf.com	1970-01-01	2018-09-01 06:00:00	Fidelity National Financial	1	load	1.0	load	1970-01-01 00:00:00
-245	republicservices.com	1970-01-01	2018-09-01 06:00:00	Republic Services	1	load	1.0	load	1970-01-01 00:00:00
-246	corning.com	1970-01-01	2018-09-01 06:00:00	Corning	1	load	1.0	load	1970-01-01 00:00:00
-247	kiewit.com	1970-01-01	2018-09-01 06:00:00	Peter Kiewit Sons	1	load	1.0	load	1970-01-01 00:00:00
-248	univar.com	1970-01-01	2018-09-01 06:00:00	Univar	1	load	1.0	load	1970-01-01 00:00:00
-249	mosaicco.com	1970-01-01	2018-09-01 06:00:00	Mosaic	1	load	1.0	load	1970-01-01 00:00:00
-250	core-mark.com	1970-01-01	2018-09-01 06:00:00	Core-Mark Holding	1	load	1.0	load	1970-01-01 00:00:00
-251	thrivent.com	1970-01-01	2018-09-01 06:00:00	Thrivent Financial for Lutherans	1	load	1.0	load	1970-01-01 00:00:00
-252	c-a-m.com	1970-01-01	2018-09-01 06:00:00	Cameron International	1	load	1.0	load	1970-01-01 00:00:00
-253	hdsupply.com	1970-01-01	2018-09-01 06:00:00	HD Supply Holdings	1	load	1.0	load	1970-01-01 00:00:00
-254	antheminc.com	1970-01-01	2018-09-01 06:00:00	Anthem	1	load	1.0	load	1970-01-01 00:00:00
-255	crowncork.com	1970-01-01	2018-09-01 06:00:00	Crown Holdings	1	load	1.0	load	1970-01-01 00:00:00
-256	eogresources.com	1970-01-01	2018-09-01 06:00:00	EOG Resources	1	load	1.0	load	1970-01-01 00:00:00
-257	veritivcorp.com	1970-01-01	2018-09-01 06:00:00	Veritiv	1	load	1.0	load	1970-01-01 00:00:00
-258	anadarko.com	1970-01-01	2018-09-01 06:00:00	Anadarko Petroleum	1	load	1.0	load	1970-01-01 00:00:00
-259	labcorp.com	1970-01-01	2018-09-01 06:00:00	Laboratory Corp. of America	1	load	1.0	load	1970-01-01 00:00:00
-260	pacificlife.com	1970-01-01	2018-09-01 06:00:00	Pacific Life	1	load	1.0	load	1970-01-01 00:00:00
-261	newscorp.com	1970-01-01	2018-09-01 06:00:00	News Corp.	1	load	1.0	load	1970-01-01 00:00:00
-262	jarden.com	1970-01-01	2018-09-01 06:00:00	Jarden	1	load	1.0	load	1970-01-01 00:00:00
-263	suntrust.com	1970-01-01	2018-09-01 06:00:00	SunTrust Banks	1	load	1.0	load	1970-01-01 00:00:00
-264	avisbudgetgroup.com	1970-01-01	2018-09-01 06:00:00	Avis Budget Group	1	load	1.0	load	1970-01-01 00:00:00
-265	pg.com	1970-01-01	2018-09-01 06:00:00	Procter & Gamble	1	load	1.0	load	1970-01-01 00:00:00
-266	broadcom.com	1970-01-01	2018-09-01 06:00:00	Broadcom	1	load	1.0	load	1970-01-01 00:00:00
-267	amfam.com	1970-01-01	2018-09-01 06:00:00	American Family Insurance Group	1	load	1.0	load	1970-01-01 00:00:00
-268	level3.com	1970-01-01	2018-09-01 06:00:00	Level 3 Communications	1	load	1.0	load	1970-01-01 00:00:00
-269	tenneco.com	1970-01-01	2018-09-01 06:00:00	Tenneco	1	load	1.0	load	1970-01-01 00:00:00
-270	unfi.com	1970-01-01	2018-09-01 06:00:00	United Natural Foods	1	load	1.0	load	1970-01-01 00:00:00
-271	deanfoods.com	1970-01-01	2018-09-01 06:00:00	Dean Foods	1	load	1.0	load	1970-01-01 00:00:00
-272	campbellsoupcompany.com	1970-01-01	2018-09-01 06:00:00	Campbell Soup	1	load	1.0	load	1970-01-01 00:00:00
-273	mohawkind.com	1970-01-01	2018-09-01 06:00:00	Mohawk Industries	1	load	1.0	load	1970-01-01 00:00:00
-274	borgwarner.com	1970-01-01	2018-09-01 06:00:00	BorgWarner	1	load	1.0	load	1970-01-01 00:00:00
-275	pvh.com	1970-01-01	2018-09-01 06:00:00	PVH	1	load	1.0	load	1970-01-01 00:00:00
-276	statefarm.com	1970-01-01	2018-09-01 06:00:00	State Farm Insurance Cos.	1	load	1.0	load	1970-01-01 00:00:00
-277	ball.com	1970-01-01	2018-09-01 06:00:00	Ball	1	load	1.0	load	1970-01-01 00:00:00
-278	oreillyauto.com	1970-01-01	2018-09-01 06:00:00	O Reilly Automotive	1	load	1.0	load	1970-01-01 00:00:00
-279	eversource.com	1970-01-01	2018-09-01 06:00:00	Eversource Energy	1	load	1.0	load	1970-01-01 00:00:00
-280	franklinresources.com	1970-01-01	2018-09-01 06:00:00	Franklin Resources	1	load	1.0	load	1970-01-01 00:00:00
-281	masco.com	1970-01-01	2018-09-01 06:00:00	Masco	1	load	1.0	load	1970-01-01 00:00:00
-282	lithia.com	1970-01-01	2018-09-01 06:00:00	Lithia Motors	1	load	1.0	load	1970-01-01 00:00:00
-283	kkr.com	1970-01-01	2018-09-01 06:00:00	KKR	1	load	1.0	load	1970-01-01 00:00:00
-284	oneok.com	1970-01-01	2018-09-01 06:00:00	Oneok	1	load	1.0	load	1970-01-01 00:00:00
-285	newmont.com	1970-01-01	2018-09-01 06:00:00	Newmont Mining	1	load	1.0	load	1970-01-01 00:00:00
-286	pplweb.com	1970-01-01	2018-09-01 06:00:00	PPL	1	load	1.0	load	1970-01-01 00:00:00
-287	google.com	1970-01-01	2018-09-01 06:00:00	Alphabet	1	load	1.0	load	1970-01-01 00:00:00
-288	spartannash.com	1970-01-01	2018-09-01 06:00:00	SpartanNash	1	load	1.0	load	1970-01-01 00:00:00
-289	quantaservices.com	1970-01-01	2018-09-01 06:00:00	Quanta Services	1	load	1.0	load	1970-01-01 00:00:00
-290	xpo.com	1970-01-01	2018-09-01 06:00:00	XPO Logistics	1	load	1.0	load	1970-01-01 00:00:00
-291	ralphlauren.com	1970-01-01	2018-09-01 06:00:00	Ralph Lauren	1	load	1.0	load	1970-01-01 00:00:00
-292	interpublic.com	1970-01-01	2018-09-01 06:00:00	Interpublic Group	1	load	1.0	load	1970-01-01 00:00:00
-293	steeldynamics.com	1970-01-01	2018-09-01 06:00:00	Steel Dynamics	1	load	1.0	load	1970-01-01 00:00:00
-294	wesco.com	1970-01-01	2018-09-01 06:00:00	WESCO International	1	load	1.0	load	1970-01-01 00:00:00
-295	questdiagnostics.com	1970-01-01	2018-09-01 06:00:00	Quest Diagnostics	1	load	1.0	load	1970-01-01 00:00:00
-296	bostonscientific.com	1970-01-01	2018-09-01 06:00:00	Boston Scientific	1	load	1.0	load	1970-01-01 00:00:00
-297	agcocorp.com	1970-01-01	2018-09-01 06:00:00	AGCO	1	load	1.0	load	1970-01-01 00:00:00
-298	comcastcorporation.com	1970-01-01	2018-09-01 06:00:00	Comcast	1	load	1.0	load	1970-01-01 00:00:00
-299	footlocker-inc.com	1970-01-01	2018-09-01 06:00:00	Foot Locker	1	load	1.0	load	1970-01-01 00:00:00
-300	thehersheycompany.com	1970-01-01	2018-09-01 06:00:00	Hershey	1	load	1.0	load	1970-01-01 00:00:00
-450	nvrinc.com	1970-01-01	2018-09-01 06:00:00	NVR	1	load	1.0	load	1970-01-01 00:00:00
-301	centerpointenergy.com	1970-01-01	2018-09-01 06:00:00	CenterPoint Energy	1	load	1.0	load	1970-01-01 00:00:00
-302	williams.com	1970-01-01	2018-09-01 06:00:00	Williams	1	load	1.0	load	1970-01-01 00:00:00
-303	dickssportinggoods.com	1970-01-01	2018-09-01 06:00:00	Dicks Sporting Goods	1	load	1.0	load	1970-01-01 00:00:00
-304	livenation.com	1970-01-01	2018-09-01 06:00:00	Live Nation Entertainment	1	load	1.0	load	1970-01-01 00:00:00
-305	mutualofomaha.com	1970-01-01	2018-09-01 06:00:00	Mutual of Omaha Insurance	1	load	1.0	load	1970-01-01 00:00:00
-306	wrberkley.com	1970-01-01	2018-09-01 06:00:00	W.R. Berkley	1	load	1.0	load	1970-01-01 00:00:00
-307	lkqcorp.com	1970-01-01	2018-09-01 06:00:00	LKQ	1	load	1.0	load	1970-01-01 00:00:00
-308	avoncompany.com	1970-01-01	2018-09-01 06:00:00	Avon Products	1	load	1.0	load	1970-01-01 00:00:00
-309	target.com	1970-01-01	2018-09-01 06:00:00	Target	1	load	1.0	load	1970-01-01 00:00:00
-310	darden.com	1970-01-01	2018-09-01 06:00:00	Darden Restaurants	1	load	1.0	load	1970-01-01 00:00:00
-311	kindredhealthcare.com	1970-01-01	2018-09-01 06:00:00	Kindred Healthcare	1	load	1.0	load	1970-01-01 00:00:00
-312	weyerhaeuser.com	1970-01-01	2018-09-01 06:00:00	Weyerhaeuser	1	load	1.0	load	1970-01-01 00:00:00
-313	caseys.com	1970-01-01	2018-09-01 06:00:00	Caseys General Stores	1	load	1.0	load	1970-01-01 00:00:00
-314	sealedair.com	1970-01-01	2018-09-01 06:00:00	Sealed Air	1	load	1.0	load	1970-01-01 00:00:00
-315	53.com	1970-01-01	2018-09-01 06:00:00	Fifth Third Bancorp	1	load	1.0	load	1970-01-01 00:00:00
-316	dovercorporation.com	1970-01-01	2018-09-01 06:00:00	Dover	1	load	1.0	load	1970-01-01 00:00:00
-317	huntingtoningalls.com	1970-01-01	2018-09-01 06:00:00	Huntington Ingalls Industries	1	load	1.0	load	1970-01-01 00:00:00
-318	netflix.com	1970-01-01	2018-09-01 06:00:00	Netflix	1	load	1.0	load	1970-01-01 00:00:00
-319	dillards.com	1970-01-01	2018-09-01 06:00:00	Dillards	1	load	1.0	load	1970-01-01 00:00:00
-320	jnj.com	1970-01-01	2018-09-01 06:00:00	Johnson & Johnson	1	load	1.0	load	1970-01-01 00:00:00
-321	emcorgroup.com	1970-01-01	2018-09-01 06:00:00	EMCOR Group	1	load	1.0	load	1970-01-01 00:00:00
-322	edwardjones.com	1970-01-01	2018-09-01 06:00:00	Jones Financial	1	load	1.0	load	1970-01-01 00:00:00
-323	aksteel.com	1970-01-01	2018-09-01 06:00:00	AK Steel Holding	1	load	1.0	load	1970-01-01 00:00:00
-324	ugicorp.com	1970-01-01	2018-09-01 06:00:00	UGI	1	load	1.0	load	1970-01-01 00:00:00
-325	expediainc.com	1970-01-01	2018-09-01 06:00:00	Expedia	1	load	1.0	load	1970-01-01 00:00:00
-326	salesforce.com	1970-01-01	2018-09-01 06:00:00	salesforce.com	1	load	1.0	load	1970-01-01 00:00:00
-327	targaresources.com	1970-01-01	2018-09-01 06:00:00	Targa Resources	1	load	1.0	load	1970-01-01 00:00:00
-328	apachecorp.com	1970-01-01	2018-09-01 06:00:00	Apache	1	load	1.0	load	1970-01-01 00:00:00
-329	spiritaero.com	1970-01-01	2018-09-01 06:00:00	Spirit AeroSystems Holdings	1	load	1.0	load	1970-01-01 00:00:00
-330	expeditors.com	1970-01-01	2018-09-01 06:00:00	Expeditors International of Washington	1	load	1.0	load	1970-01-01 00:00:00
-331	metlife.com	1970-01-01	2018-09-01 06:00:00	MetLife	1	load	1.0	load	1970-01-01 00:00:00
-332	anixter.com	1970-01-01	2018-09-01 06:00:00	Anixter International	1	load	1.0	load	1970-01-01 00:00:00
-333	fisglobal.com	1970-01-01	2018-09-01 06:00:00	Fidelity National Information Services	1	load	1.0	load	1970-01-01 00:00:00
-334	asburyauto.com	1970-01-01	2018-09-01 06:00:00	Asbury Automotive Group	1	load	1.0	load	1970-01-01 00:00:00
-335	hess.com	1970-01-01	2018-09-01 06:00:00	Hess	1	load	1.0	load	1970-01-01 00:00:00
-336	ryder.com	1970-01-01	2018-09-01 06:00:00	Ryder System	1	load	1.0	load	1970-01-01 00:00:00
-337	terex.com	1970-01-01	2018-09-01 06:00:00	Terex	1	load	1.0	load	1970-01-01 00:00:00
-338	cokecce.com	1970-01-01	2018-09-01 06:00:00	Coca-Cola European Partners	1	load	1.0	load	1970-01-01 00:00:00
-339	auto-owners.com	1970-01-01	2018-09-01 06:00:00	Auto-Owners Insurance	1	load	1.0	load	1970-01-01 00:00:00
-340	cablevision.com	1970-01-01	2018-09-01 06:00:00	Cablevision Systems	1	load	1.0	load	1970-01-01 00:00:00
-341	symantec.com	1970-01-01	2018-09-01 06:00:00	Symantec	1	load	1.0	load	1970-01-01 00:00:00
-342	mckesson.com	1970-01-01	2018-09-01 06:00:00	McKesson	1	load	1.0	load	1970-01-01 00:00:00
-343	adm.com	1970-01-01	2018-09-01 06:00:00	Archer Daniels Midland	1	load	1.0	load	1970-01-01 00:00:00
-344	aboutschwab.com	1970-01-01	2018-09-01 06:00:00	Charles Schwab	1	load	1.0	load	1970-01-01 00:00:00
-345	calpine.com	1970-01-01	2018-09-01 06:00:00	Calpine	1	load	1.0	load	1970-01-01 00:00:00
-346	cmsenergy.com	1970-01-01	2018-09-01 06:00:00	CMS Energy	1	load	1.0	load	1970-01-01 00:00:00
-347	alliancedata.com	1970-01-01	2018-09-01 06:00:00	Alliance Data Systems	1	load	1.0	load	1970-01-01 00:00:00
-348	jetblue.com	1970-01-01	2018-09-01 06:00:00	JetBlue Airways	1	load	1.0	load	1970-01-01 00:00:00
-349	discoverycommunications.com	1970-01-01	2018-09-01 06:00:00	Discovery Communications	1	load	1.0	load	1970-01-01 00:00:00
-350	trin.net	1970-01-01	2018-09-01 06:00:00	Trinity Industries	1	load	1.0	load	1970-01-01 00:00:00
-351	sanmina.com	1970-01-01	2018-09-01 06:00:00	Sanmina	1	load	1.0	load	1970-01-01 00:00:00
-352	ncr.com	1970-01-01	2018-09-01 06:00:00	NCR	1	load	1.0	load	1970-01-01 00:00:00
-353	fmctechnologies.com	1970-01-01	2018-09-01 06:00:00	FMC Technologies	1	load	1.0	load	1970-01-01 00:00:00
-354	marathonpetroleum.com	1970-01-01	2018-09-01 06:00:00	Marathon Petroleum	1	load	1.0	load	1970-01-01 00:00:00
-355	erieinsurance.com	1970-01-01	2018-09-01 06:00:00	Erie Insurance Group	1	load	1.0	load	1970-01-01 00:00:00
-356	rockwellautomation.com	1970-01-01	2018-09-01 06:00:00	Rockwell Automation	1	load	1.0	load	1970-01-01 00:00:00
-357	drpeppersnapplegroup.com	1970-01-01	2018-09-01 06:00:00	Dr Pepper Snapple Group	1	load	1.0	load	1970-01-01 00:00:00
-358	iheartmedia.com	1970-01-01	2018-09-01 06:00:00	iHeartMedia	1	load	1.0	load	1970-01-01 00:00:00
-359	tractorsupply.com	1970-01-01	2018-09-01 06:00:00	Tractor Supply	1	load	1.0	load	1970-01-01 00:00:00
-360	jbhunt.com	1970-01-01	2018-09-01 06:00:00	J.B. Hunt Transport Services	1	load	1.0	load	1970-01-01 00:00:00
-361	cmc.com	1970-01-01	2018-09-01 06:00:00	Commercial Metals	1	load	1.0	load	1970-01-01 00:00:00
-362	o-i.com	1970-01-01	2018-09-01 06:00:00	Owens-Illinois	1	load	1.0	load	1970-01-01 00:00:00
-363	harman.com	1970-01-01	2018-09-01 06:00:00	Harman International Industries	1	load	1.0	load	1970-01-01 00:00:00
-364	baxalta.com	1970-01-01	2018-09-01 06:00:00	Baxalta	1	load	1.0	load	1970-01-01 00:00:00
-365	freddiemac.com	1970-01-01	2018-09-01 06:00:00	Freddie Mac	1	load	1.0	load	1970-01-01 00:00:00
-366	afginc.com	1970-01-01	2018-09-01 06:00:00	American Financial Group	1	load	1.0	load	1970-01-01 00:00:00
-367	netapp.com	1970-01-01	2018-09-01 06:00:00	NetApp	1	load	1.0	load	1970-01-01 00:00:00
-368	graybar.com	1970-01-01	2018-09-01 06:00:00	Graybar Electric	1	load	1.0	load	1970-01-01 00:00:00
-369	oshkoshcorporation.com	1970-01-01	2018-09-01 06:00:00	Oshkosh	1	load	1.0	load	1970-01-01 00:00:00
-370	ameren.com	1970-01-01	2018-09-01 06:00:00	Ameren	1	load	1.0	load	1970-01-01 00:00:00
-371	amark.com	1970-01-01	2018-09-01 06:00:00	A-Mark Precious Metals	1	load	1.0	load	1970-01-01 00:00:00
-372	barnesandnobleinc.com	1970-01-01	2018-09-01 06:00:00	Barnes & Noble	1	load	1.0	load	1970-01-01 00:00:00
-373	dana.com	1970-01-01	2018-09-01 06:00:00	Dana Holding	1	load	1.0	load	1970-01-01 00:00:00
-374	cbrands.com	1970-01-01	2018-09-01 06:00:00	Constellation Brands	1	load	1.0	load	1970-01-01 00:00:00
-375	lifepointhealth.net	1970-01-01	2018-09-01 06:00:00	LifePoint Health	1	load	1.0	load	1970-01-01 00:00:00
-376	pepsico.com	1970-01-01	2018-09-01 06:00:00	PepsiCo	1	load	1.0	load	1970-01-01 00:00:00
-377	zimmerbiomet.com	1970-01-01	2018-09-01 06:00:00	Zimmer Biomet Holdings	1	load	1.0	load	1970-01-01 00:00:00
-378	harley-davidson.com	1970-01-01	2018-09-01 06:00:00	Harley-Davidson	1	load	1.0	load	1970-01-01 00:00:00
-379	pultegroupinc.com	1970-01-01	2018-09-01 06:00:00	PulteGroup	1	load	1.0	load	1970-01-01 00:00:00
-380	newellbrands.com	1970-01-01	2018-09-01 06:00:00	Newell Brands	1	load	1.0	load	1970-01-01 00:00:00
-381	averydennison.com	1970-01-01	2018-09-01 06:00:00	Avery Dennison	1	load	1.0	load	1970-01-01 00:00:00
-382	jll.com	1970-01-01	2018-09-01 06:00:00	Jones Lang LaSalle	1	load	1.0	load	1970-01-01 00:00:00
-383	wecenergygroup.com	1970-01-01	2018-09-01 06:00:00	WEC Energy Group	1	load	1.0	load	1970-01-01 00:00:00
-384	marathonoil.com	1970-01-01	2018-09-01 06:00:00	Marathon Oil	1	load	1.0	load	1970-01-01 00:00:00
-385	ta-petrol.com	1970-01-01	2018-09-01 06:00:00	TravelCenters of America	1	load	1.0	load	1970-01-01 00:00:00
-386	unitedrentals.com	1970-01-01	2018-09-01 06:00:00	United Rentals	1	load	1.0	load	1970-01-01 00:00:00
-387	utc.com	1970-01-01	2018-09-01 06:00:00	United Technologies	1	load	1.0	load	1970-01-01 00:00:00
-388	hrggroup.com	1970-01-01	2018-09-01 06:00:00	HRG Group	1	load	1.0	load	1970-01-01 00:00:00
-389	oldrepublic.com	1970-01-01	2018-09-01 06:00:00	Old Republic International	1	load	1.0	load	1970-01-01 00:00:00
-390	windstream.com	1970-01-01	2018-09-01 06:00:00	Windstream Holdings	1	load	1.0	load	1970-01-01 00:00:00
-391	starwoodhotels.com	1970-01-01	2018-09-01 06:00:00	Starwood Hotels & Resorts	1	load	1.0	load	1970-01-01 00:00:00
-392	delekus.com	1970-01-01	2018-09-01 06:00:00	Delek US Holdings	1	load	1.0	load	1970-01-01 00:00:00
-393	packagingcorp.com	1970-01-01	2018-09-01 06:00:00	Packaging Corp. of America	1	load	1.0	load	1970-01-01 00:00:00
-394	quintiles.com	1970-01-01	2018-09-01 06:00:00	Quintiles IMS Holdings	1	load	1.0	load	1970-01-01 00:00:00
-395	hanes.com	1970-01-01	2018-09-01 06:00:00	Hanesbrands	1	load	1.0	load	1970-01-01 00:00:00
-396	realogy.com	1970-01-01	2018-09-01 06:00:00	Realogy Holdings	1	load	1.0	load	1970-01-01 00:00:00
-397	mattel.com	1970-01-01	2018-09-01 06:00:00	Mattel	1	load	1.0	load	1970-01-01 00:00:00
-398	aetna.com	1970-01-01	2018-09-01 06:00:00	Aetna	1	load	1.0	load	1970-01-01 00:00:00
-399	motorolasolutions.com	1970-01-01	2018-09-01 06:00:00	Motorola Solutions	1	load	1.0	load	1970-01-01 00:00:00
-400	jmsmucker.com	1970-01-01	2018-09-01 06:00:00	J.M. Smucker	1	load	1.0	load	1970-01-01 00:00:00
-401	regions.com	1970-01-01	2018-09-01 06:00:00	Regions Financial	1	load	1.0	load	1970-01-01 00:00:00
-402	celanese.com	1970-01-01	2018-09-01 06:00:00	Celanese	1	load	1.0	load	1970-01-01 00:00:00
-403	thecloroxcompany.com	1970-01-01	2018-09-01 06:00:00	Clorox	1	load	1.0	load	1970-01-01 00:00:00
-404	ingredion.com	1970-01-01	2018-09-01 06:00:00	Ingredion	1	load	1.0	load	1970-01-01 00:00:00
-405	genesishcc.com	1970-01-01	2018-09-01 06:00:00	Genesis Healthcare	1	load	1.0	load	1970-01-01 00:00:00
-406	peabodyenergy.com	1970-01-01	2018-09-01 06:00:00	Peabody Energy	1	load	1.0	load	1970-01-01 00:00:00
-407	alaskaair.com	1970-01-01	2018-09-01 06:00:00	Alaska Air Group	1	load	1.0	load	1970-01-01 00:00:00
-408	seaboardcorp.com	1970-01-01	2018-09-01 06:00:00	Seaboard	1	load	1.0	load	1970-01-01 00:00:00
-409	lowes.com	1970-01-01	2018-09-01 06:00:00	Lowes	1	load	1.0	load	1970-01-01 00:00:00
-410	frontier.com	1970-01-01	2018-09-01 06:00:00	Frontier Communications	1	load	1.0	load	1970-01-01 00:00:00
-411	amphenol.com	1970-01-01	2018-09-01 06:00:00	Amphenol	1	load	1.0	load	1970-01-01 00:00:00
-412	lansingtradegroup.com	1970-01-01	2018-09-01 06:00:00	Lansing Trade Group	1	load	1.0	load	1970-01-01 00:00:00
-413	sandisk.com	1970-01-01	2018-09-01 06:00:00	SanDisk	1	load	1.0	load	1970-01-01 00:00:00
-414	sjm.com	1970-01-01	2018-09-01 06:00:00	St. Jude Medical	1	load	1.0	load	1970-01-01 00:00:00
-415	wyndhamworldwide.com	1970-01-01	2018-09-01 06:00:00	Wyndham Worldwide	1	load	1.0	load	1970-01-01 00:00:00
-416	kellyservices.com	1970-01-01	2018-09-01 06:00:00	Kelly Services	1	load	1.0	load	1970-01-01 00:00:00
-417	westernunion.com	1970-01-01	2018-09-01 06:00:00	Western Union	1	load	1.0	load	1970-01-01 00:00:00
-418	evhc.net	1970-01-01	2018-09-01 06:00:00	Envision Healthcare Holdings	1	load	1.0	load	1970-01-01 00:00:00
-419	visteon.com	1970-01-01	2018-09-01 06:00:00	Visteon	1	load	1.0	load	1970-01-01 00:00:00
-420	ups.com	1970-01-01	2018-09-01 06:00:00	UPS	1	load	1.0	load	1970-01-01 00:00:00
-421	ajg.com	1970-01-01	2018-09-01 06:00:00	Arthur J. Gallagher	1	load	1.0	load	1970-01-01 00:00:00
-422	hosthotels.com	1970-01-01	2018-09-01 06:00:00	Host Hotels & Resorts	1	load	1.0	load	1970-01-01 00:00:00
-423	ashland.com	1970-01-01	2018-09-01 06:00:00	Ashland	1	load	1.0	load	1970-01-01 00:00:00
-424	insight.com	1970-01-01	2018-09-01 06:00:00	Insight Enterprises	1	load	1.0	load	1970-01-01 00:00:00
-425	energyfutureholdings.com	1970-01-01	2018-09-01 06:00:00	Energy Future Holdings	1	load	1.0	load	1970-01-01 00:00:00
-426	markelcorp.com	1970-01-01	2018-09-01 06:00:00	Markel	1	load	1.0	load	1970-01-01 00:00:00
-427	essendant.com	1970-01-01	2018-09-01 06:00:00	Essendant	1	load	1.0	load	1970-01-01 00:00:00
-428	ch2m.com	1970-01-01	2018-09-01 06:00:00	CH2M Hill	1	load	1.0	load	1970-01-01 00:00:00
-429	westernsouthern.com	1970-01-01	2018-09-01 06:00:00	Western & Southern Financial Group	1	load	1.0	load	1970-01-01 00:00:00
-430	owenscorning.com	1970-01-01	2018-09-01 06:00:00	Owens Corning	1	load	1.0	load	1970-01-01 00:00:00
-431	aig.com	1970-01-01	2018-09-01 06:00:00	AIG	1	load	1.0	load	1970-01-01 00:00:00
-432	spglobal.com	1970-01-01	2018-09-01 06:00:00	S&P Global	1	load	1.0	load	1970-01-01 00:00:00
-433	raymondjames.com	1970-01-01	2018-09-01 06:00:00	Raymond James Financial	1	load	1.0	load	1970-01-01 00:00:00
-434	nisource.com	1970-01-01	2018-09-01 06:00:00	NiSource	1	load	1.0	load	1970-01-01 00:00:00
-435	airgas.com	1970-01-01	2018-09-01 06:00:00	Airgas	1	load	1.0	load	1970-01-01 00:00:00
-436	abm.com	1970-01-01	2018-09-01 06:00:00	ABM Industries	1	load	1.0	load	1970-01-01 00:00:00
-437	citizensbank.com	1970-01-01	2018-09-01 06:00:00	Citizens Financial Group	1	load	1.0	load	1970-01-01 00:00:00
-438	boozallen.com	1970-01-01	2018-09-01 06:00:00	Booz Allen Hamilton Holding	1	load	1.0	load	1970-01-01 00:00:00
-439	simon.com	1970-01-01	2018-09-01 06:00:00	Simon Property Group	1	load	1.0	load	1970-01-01 00:00:00
-440	domtar.com	1970-01-01	2018-09-01 06:00:00	Domtar	1	load	1.0	load	1970-01-01 00:00:00
-441	rockwellcollins.com	1970-01-01	2018-09-01 06:00:00	Rockwell Collins	1	load	1.0	load	1970-01-01 00:00:00
-442	prudential.com	1970-01-01	2018-09-01 06:00:00	Prudential Financial	1	load	1.0	load	1970-01-01 00:00:00
-443	lamresearch.com	1970-01-01	2018-09-01 06:00:00	Lam Research	1	load	1.0	load	1970-01-01 00:00:00
-444	fiserv.com	1970-01-01	2018-09-01 06:00:00	Fiserv	1	load	1.0	load	1970-01-01 00:00:00
-445	spectraenergy.com	1970-01-01	2018-09-01 06:00:00	Spectra Energy	1	load	1.0	load	1970-01-01 00:00:00
-446	navient.com	1970-01-01	2018-09-01 06:00:00	Navient	1	load	1.0	load	1970-01-01 00:00:00
-447	biglots.com	1970-01-01	2018-09-01 06:00:00	Big Lots	1	load	1.0	load	1970-01-01 00:00:00
-448	tdsinc.com	1970-01-01	2018-09-01 06:00:00	Telephone & Data Systems	1	load	1.0	load	1970-01-01 00:00:00
-449	firstam.com	1970-01-01	2018-09-01 06:00:00	First American Financial	1	load	1.0	load	1970-01-01 00:00:00
-451	cinfin.com	1970-01-01	2018-09-01 06:00:00	Cincinnati Financial	1	load	1.0	load	1970-01-01 00:00:00
-452	burlingtonstores.com	1970-01-01	2018-09-01 06:00:00	Burlington Stores	1	load	1.0	load	1970-01-01 00:00:00
-453	unitedhealthgroup.com	1970-01-01	2018-09-01 06:00:00	UnitedHealth Group	1	load	1.0	load	1970-01-01 00:00:00
-454	intel.com	1970-01-01	2018-09-01 06:00:00	Intel	1	load	1.0	load	1970-01-01 00:00:00
-455	humana.com	1970-01-01	2018-09-01 06:00:00	Humana	1	load	1.0	load	1970-01-01 00:00:00
-456	disney.com	1970-01-01	2018-09-01 06:00:00	Disney	1	load	1.0	load	1970-01-01 00:00:00
-457	cisco.com	1970-01-01	2018-09-01 06:00:00	Cisco Systems	1	load	1.0	load	1970-01-01 00:00:00
-458	pfizer.com	1970-01-01	2018-09-01 06:00:00	Pfizer	1	load	1.0	load	1970-01-01 00:00:00
-459	dow.com	1970-01-01	2018-09-01 06:00:00	Dow Chemical	1	load	1.0	load	1970-01-01 00:00:00
-460	sysco.com	1970-01-01	2018-09-01 06:00:00	Sysco	1	load	1.0	load	1970-01-01 00:00:00
-461	fedex.com	1970-01-01	2018-09-01 06:00:00	FedEx	1	load	1.0	load	1970-01-01 00:00:00
-462	caterpillar.com	1970-01-01	2018-09-01 06:00:00	Caterpillar	1	load	1.0	load	1970-01-01 00:00:00
-463	lockheedmartin.com	1970-01-01	2018-09-01 06:00:00	Lockheed Martin	1	load	1.0	load	1970-01-01 00:00:00
-464	cvshealth.com	1970-01-01	2018-09-01 06:00:00	CVS Health	1	load	1.0	load	1970-01-01 00:00:00
-465	newyorklife.com	1970-01-01	2018-09-01 06:00:00	New York Life Insurance	1	load	1.0	load	1970-01-01 00:00:00
-466	coca-colacompany.com	1970-01-01	2018-09-01 06:00:00	Coca-Cola	1	load	1.0	load	1970-01-01 00:00:00
-467	hcahealthcare.com	1970-01-01	2018-09-01 06:00:00	HCA Holdings	1	load	1.0	load	1970-01-01 00:00:00
-468	ingrammicro.com	1970-01-01	2018-09-01 06:00:00	Ingram Micro	1	load	1.0	load	1970-01-01 00:00:00
-469	energytransfer.com	1970-01-01	2018-09-01 06:00:00	Energy Transfer Equity	1	load	1.0	load	1970-01-01 00:00:00
-470	tysonfoods.com	1970-01-01	2018-09-01 06:00:00	Tyson Foods	1	load	1.0	load	1970-01-01 00:00:00
-471	aa.com	1970-01-01	2018-09-01 06:00:00	American Airlines Group	1	load	1.0	load	1970-01-01 00:00:00
-472	delta.com	1970-01-01	2018-09-01 06:00:00	Delta Air Lines	1	load	1.0	load	1970-01-01 00:00:00
-473	nationwide.com	1970-01-01	2018-09-01 06:00:00	Nationwide	1	load	1.0	load	1970-01-01 00:00:00
-474	johnsoncontrols.com	1970-01-01	2018-09-01 06:00:00	Johnson Controls	1	load	1.0	load	1970-01-01 00:00:00
-475	gm.com	1970-01-01	2018-09-01 06:00:00	General Motors	1	load	1.0	load	1970-01-01 00:00:00
-476	bestbuy.com	1970-01-01	2018-09-01 06:00:00	Best Buy	1	load	1.0	load	1970-01-01 00:00:00
-477	merck.com	1970-01-01	2018-09-01 06:00:00	Merck	1	load	1.0	load	1970-01-01 00:00:00
-478	libertymutual.com	1970-01-01	2018-09-01 06:00:00	Liberty Mutual Insurance Group	1	load	1.0	load	1970-01-01 00:00:00
-479	gs.com	1970-01-01	2018-09-01 06:00:00	Goldman Sachs Group	1	load	1.0	load	1970-01-01 00:00:00
-480	honeywell.com	1970-01-01	2018-09-01 06:00:00	Honeywell International	1	load	1.0	load	1970-01-01 00:00:00
-481	massmutual.com	1970-01-01	2018-09-01 06:00:00	Massachusetts Mutual Life Insurance	1	load	1.0	load	1970-01-01 00:00:00
-482	oracle.com	1970-01-01	2018-09-01 06:00:00	Oracle	1	load	1.0	load	1970-01-01 00:00:00
-483	morganstanley.com	1970-01-01	2018-09-01 06:00:00	Morgan Stanley	1	load	1.0	load	1970-01-01 00:00:00
-484	cigna.com	1970-01-01	2018-09-01 06:00:00	Cigna	1	load	1.0	load	1970-01-01 00:00:00
-485	unitedcontinentalholdings.com	1970-01-01	2018-09-01 06:00:00	United Continental Holdings	1	load	1.0	load	1970-01-01 00:00:00
-486	ford.com	1970-01-01	2018-09-01 06:00:00	Ford Motor	1	load	1.0	load	1970-01-01 00:00:00
-487	allstate.com	1970-01-01	2018-09-01 06:00:00	Allstate	1	load	1.0	load	1970-01-01 00:00:00
-488	tiaa.org	1970-01-01	2018-09-01 06:00:00	TIAA	1	load	1.0	load	1970-01-01 00:00:00
-489	intlfcstone.com	1970-01-01	2018-09-01 06:00:00	INTL FCStone	1	load	1.0	load	1970-01-01 00:00:00
-490	chsinc.com	1970-01-01	2018-09-01 06:00:00	CHS	1	load	1.0	load	1970-01-01 00:00:00
-491	americanexpress.com	1970-01-01	2018-09-01 06:00:00	American Express	1	load	1.0	load	1970-01-01 00:00:00
-492	gilead.com	1970-01-01	2018-09-01 06:00:00	Gilead Sciences	1	load	1.0	load	1970-01-01 00:00:00
-493	publix.com	1970-01-01	2018-09-01 06:00:00	Publix Super Markets	1	load	1.0	load	1970-01-01 00:00:00
-494	generaldynamics.com	1970-01-01	2018-09-01 06:00:00	General Dynamics	1	load	1.0	load	1970-01-01 00:00:00
-495	tjx.com	1970-01-01	2018-09-01 06:00:00	TJX	1	load	1.0	load	1970-01-01 00:00:00
-496	conocophillips.com	1970-01-01	2018-09-01 06:00:00	ConocoPhillips	1	load	1.0	load	1970-01-01 00:00:00
-497	att.com	1970-01-01	2018-09-01 06:00:00	AT&T	1	load	1.0	load	1970-01-01 00:00:00
-498	nike.com	1970-01-01	2018-09-01 06:00:00	Nike	1	load	1.0	load	1970-01-01 00:00:00
-499	wfscorp.com	1970-01-01	2018-09-01 06:00:00	World Fuel Services	1	load	1.0	load	1970-01-01 00:00:00
-500	3m.com	1970-01-01	2018-09-01 06:00:00	3M	1	load	1.0	load	1970-01-01 00:00:00
-501	mondelezinternational.com	1970-01-01	2018-09-01 06:00:00	Mondelez International	1	load	1.0	load	1970-01-01 00:00:00
-502	exeloncorp.com	1970-01-01	2018-09-01 06:00:00	Exelon	1	load	1.0	load	1970-01-01 00:00:00
-503	21cf.com	1970-01-01	2018-09-01 06:00:00	Twenty-First Century Fox	1	load	1.0	load	1970-01-01 00:00:00
-504	johndeere.com	1970-01-01	2018-09-01 06:00:00	Deere	1	load	1.0	load	1970-01-01 00:00:00
-505	tsocorp.com	1970-01-01	2018-09-01 06:00:00	Tesoro	1	load	1.0	load	1970-01-01 00:00:00
-506	timewarner.com	1970-01-01	2018-09-01 06:00:00	Time Warner	1	load	1.0	load	1970-01-01 00:00:00
-507	redhat.com	1970-01-01	2018-09-01 06:00:00	Red Hat	1	load	1.0	load	1970-01-01 00:00:00
-509	openwrt.org	1970-01-01	2018-09-01 06:00:00	OpenWRT	1	load	1.0	load	1970-01-01 00:00:00
-510	panasonic.com	1970-01-01	2018-09-01 06:00:00	Panasonic	1	load	1.0	load	1970-01-01 00:00:00
-511	comcast.net	1970-01-01	2018-09-01 06:00:00	Comcast	1	load	1.0	load	1970-01-01 00:00:00
-512	linux.org	1970-01-01	2018-09-01 06:00:00	Linux	1	load	1.0	load	1970-01-01 00:00:00
-514	northwesternmutual.com	1970-01-01	2018-09-01 06:00:00	Northwestern Mutual	1	load	1.0	load	1970-01-01 00:00:00
-515	kde.org	1970-01-01	2018-09-01 06:00:00	KDE	1	load	1.0	load	1970-01-01 00:00:00
-516	twitter.com	1970-01-01	2018-09-01 06:00:00	Twitter	1	load	1.0	load	1970-01-01 00:00:00
-517	adobe.com	1970-01-01	2018-09-01 06:00:00	Adobe	1	load	1.0	load	1970-01-01 00:00:00
-519	acm.org	1970-01-01	2018-09-12 02:01:59	ACM	1	load	1.0	load	1970-01-01 00:00:00
-520	outdoors@acm.org	1970-01-01	2018-09-12 02:32:53	University of Missouri	1	load	1.0	load	2013-07-15 00:00:00
-521	freebsd.org	1970-01-01	2018-09-13 21:15:22	Free BSD	1	load	1.0	load	1970-01-01 00:00:00
-\.
-
-
---
--- Data for Name: contributor_repo; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.contributor_repo (cntrb_repo_id, repo_git, repo_name, gh_repo_id, cntrb_category, event_id, created_at, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: contributors; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.contributors (cntrb_login, cntrb_email, cntrb_full_name, cntrb_company, cntrb_created_at, cntrb_type, cntrb_fake, cntrb_deleted, cntrb_long, cntrb_lat, cntrb_country_code, cntrb_state, cntrb_city, cntrb_location, cntrb_canonical, cntrb_last_used, gh_user_id, gh_login, gh_url, gh_html_url, gh_node_id, gh_avatar_url, gh_gravatar_id, gh_followers_url, gh_following_url, gh_gists_url, gh_starred_url, gh_subscriptions_url, gh_organizations_url, gh_repos_url, gh_events_url, gh_received_events_url, gh_type, gh_site_admin, gl_web_url, gl_avatar_url, gl_state, gl_username, gl_full_name, gl_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-not-provided	\N	\N	\N	2019-06-13 11:33:39	\N	0	0	\N	\N	\N	\N	\N	\N	\N	\N	1	nobody	http://fake.me	http://fake.me	x	http://fake.me	\N	http://fake.me	http://fake.me	http://fake.me	http://fake.me	http://fake.me	http://fake.me	http://fake.me	http://fake.me	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	2019-06-13 16:35:25	00000000-0000-0000-0000-000000000000
-nan	kannayoshihiro@gmail.com	KANNA Yoshihiro	UTMC	2009-04-17 12:43:58	\N	0	0	\N	\N	\N	\N	\N	\N	kannayoshihiro@gmail.com	2021-01-28 21:56:10-06	74832	nan	https://api.github.com/users/nan	https://github.com/nan	MDQ6VXNlcjc0ODMy	https://avatars.githubusercontent.com/u/74832?v=4		https://api.github.com/users/nan/followers	https://api.github.com/users/nan/following{/other_user}	https://api.github.com/users/nan/gists{/gist_id}	https://api.github.com/users/nan/starred{/owner}{/repo}	https://api.github.com/users/nan/subscriptions	https://api.github.com/users/nan/orgs	https://api.github.com/users/nan/repos	https://api.github.com/users/nan/events{/privacy}	https://api.github.com/users/nan/received_events	User	false	\N	\N	\N	\N	\N	\N	GitHub API Worker	1.0.0	GitHub API	2021-10-28 15:23:46	01000000-0000-0000-0000-000000000000
-\.
-
-
---
--- Data for Name: contributors_aliases; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.contributors_aliases (cntrb_alias_id, canonical_email, alias_email, cntrb_active, cntrb_last_modified, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: discourse_insights; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.discourse_insights (msg_discourse_id, msg_id, discourse_act, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: dm_repo_annual; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.dm_repo_annual (repo_id, email, affiliation, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: dm_repo_group_annual; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.dm_repo_group_annual (repo_group_id, email, affiliation, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: dm_repo_group_monthly; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.dm_repo_group_monthly (repo_group_id, email, affiliation, month, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: dm_repo_group_weekly; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.dm_repo_group_weekly (repo_group_id, email, affiliation, week, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: dm_repo_monthly; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.dm_repo_monthly (repo_id, email, affiliation, month, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: dm_repo_weekly; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.dm_repo_weekly (repo_id, email, affiliation, week, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: exclude; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.exclude (id, projects_id, email, domain) FROM stdin;
-\.
-
-
---
--- Data for Name: issue_assignees; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.issue_assignees (issue_assignee_id, issue_id, repo_id, issue_assignee_src_id, issue_assignee_src_node, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: issue_events; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.issue_events (event_id, issue_id, repo_id, action, action_commit_hash, created_at, node_id, node_url, platform_id, issue_event_src_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: issue_labels; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.issue_labels (issue_label_id, issue_id, repo_id, label_text, label_description, label_color, label_src_id, label_src_node_id, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: issue_message_ref; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.issue_message_ref (issue_msg_ref_id, issue_id, repo_id, msg_id, issue_msg_ref_src_node_id, issue_msg_ref_src_comment_id, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: issues; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.issues (issue_id, repo_id, pull_request, pull_request_id, created_at, issue_title, issue_body, comment_count, updated_at, closed_at, due_on, repository_url, issue_url, labels_url, comments_url, events_url, html_url, issue_state, issue_node_id, gh_issue_number, gh_issue_id, gh_user_id, tool_source, tool_version, data_source, data_collection_date, reporter_id, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: libraries; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.libraries (library_id, repo_id, platform, name, created_timestamp, updated_timestamp, library_description, keywords, library_homepage, license, version_count, latest_release_timestamp, latest_release_number, package_manager_id, dependency_count, dependent_library_count, primary_language, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: library_dependencies; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.library_dependencies (lib_dependency_id, library_id, manifest_platform, manifest_filepath, manifest_kind, repo_id_branch, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: library_version; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.library_version (library_version_id, library_id, library_platform, version_number, version_release_date, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: lstm_anomaly_models; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.lstm_anomaly_models (model_id, model_name, model_description, look_back_days, training_days, batch_size, metric, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: lstm_anomaly_results; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.lstm_anomaly_results (result_id, repo_id, repo_category, model_id, metric, contamination_factor, mean_absolute_error, remarks, metric_field, mean_absolute_actual_value, mean_absolute_prediction_value, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: message; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.message (msg_id, rgls_id, platform_msg_id, platform_node_id, repo_id, msg_text, msg_timestamp, msg_sender_email, msg_header, pltfrm_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: message_analysis; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.message_analysis (msg_analysis_id, msg_id, worker_run_id, sentiment_score, reconstruction_error, novelty_flag, feedback_flag, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: message_analysis_summary; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.message_analysis_summary (msg_summary_id, repo_id, worker_run_id, positive_ratio, negative_ratio, novel_count, period, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: message_sentiment; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.message_sentiment (msg_analysis_id, msg_id, worker_run_id, sentiment_score, reconstruction_error, novelty_flag, feedback_flag, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: message_sentiment_summary; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.message_sentiment_summary (msg_summary_id, repo_id, worker_run_id, positive_ratio, negative_ratio, novel_count, period, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: platform; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.platform (pltfrm_id, pltfrm_name, pltfrm_version, pltfrm_release_date, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-1	GitHub	3	2019-06-05	Manual Entry	Sean Goggins	GitHub	2019-06-05 17:23:42
-0	Unresolved	0	2019-06-05	Manual Entry	Sean Goggins	GitHub	2022-07-28 20:43:00
-2	GitLab	2	2019-06-05	Manual Entry	Sean Goggins	GitHub	2022-07-28 20:43:00
-\.
-
-
---
--- Data for Name: pull_request_analysis; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_analysis (pull_request_analysis_id, pull_request_id, merge_probability, mechanism, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_assignees; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_assignees (pr_assignee_map_id, pull_request_id, repo_id, pr_assignee_src_id, tool_source, tool_version, data_source, data_collection_date, contrib_id) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_commits; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_commits (pr_cmt_id, pull_request_id, repo_id, pr_cmt_sha, pr_cmt_node_id, pr_cmt_message, pr_cmt_comments_url, pr_cmt_timestamp, pr_cmt_author_email, tool_source, tool_version, data_source, data_collection_date, pr_cmt_author_cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_events; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_events (pr_event_id, pull_request_id, repo_id, action, action_commit_hash, created_at, issue_event_src_id, node_id, node_url, platform_id, pr_platform_event_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_files; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_files (pr_file_id, pull_request_id, repo_id, pr_file_additions, pr_file_deletions, pr_file_path, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_labels; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_labels (pr_label_id, pull_request_id, repo_id, pr_src_id, pr_src_node_id, pr_src_url, pr_src_description, pr_src_color, pr_src_default_bool, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_message_ref; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_message_ref (pr_msg_ref_id, pull_request_id, repo_id, msg_id, pr_message_ref_src_comment_id, pr_message_ref_src_node_id, pr_issue_url, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_meta; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_meta (pr_repo_meta_id, pull_request_id, repo_id, pr_head_or_base, pr_src_meta_label, pr_src_meta_ref, pr_sha, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_repo; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_repo (pr_repo_id, pr_repo_meta_id, pr_repo_head_or_base, pr_src_repo_id, pr_src_node_id, pr_repo_name, pr_repo_full_name, pr_repo_private_bool, tool_source, tool_version, data_source, data_collection_date, pr_cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_review_message_ref; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_review_message_ref (pr_review_msg_ref_id, pr_review_id, repo_id, msg_id, pr_review_msg_url, pr_review_src_id, pr_review_msg_src_id, pr_review_msg_node_id, pr_review_msg_diff_hunk, pr_review_msg_path, pr_review_msg_position, pr_review_msg_original_position, pr_review_msg_commit_id, pr_review_msg_original_commit_id, pr_review_msg_updated_at, pr_review_msg_html_url, pr_url, pr_review_msg_author_association, pr_review_msg_start_line, pr_review_msg_original_start_line, pr_review_msg_start_side, pr_review_msg_line, pr_review_msg_original_line, pr_review_msg_side, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_reviewers; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_reviewers (pr_reviewer_map_id, pull_request_id, pr_source_id, repo_id, pr_reviewer_src_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_reviews; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_reviews (pr_review_id, pull_request_id, repo_id, pr_review_author_association, pr_review_state, pr_review_body, pr_review_submitted_at, pr_review_src_id, pr_review_node_id, pr_review_html_url, pr_review_pull_request_url, pr_review_commit_id, platform_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_request_teams; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_request_teams (pr_team_id, pull_request_id, pr_src_team_id, pr_src_team_node, pr_src_team_url, pr_team_name, pr_team_slug, pr_team_description, pr_team_privacy, pr_team_permission, pr_team_src_members_url, pr_team_src_repositories_url, pr_team_parent_id, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: pull_requests; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.pull_requests (pull_request_id, repo_id, pr_url, pr_src_id, pr_src_node_id, pr_html_url, pr_diff_url, pr_patch_url, pr_issue_url, pr_augur_issue_id, pr_src_number, pr_src_state, pr_src_locked, pr_src_title, pr_body, pr_created_at, pr_updated_at, pr_closed_at, pr_merged_at, pr_merge_commit_sha, pr_teams, pr_milestone, pr_commits_url, pr_review_comments_url, pr_review_comment_url, pr_comments_url, pr_statuses_url, pr_meta_head_id, pr_meta_base_id, pr_src_issue_url, pr_src_comments_url, pr_src_review_comments_url, pr_src_commits_url, pr_src_statuses_url, pr_src_author_association, tool_source, tool_version, data_source, data_collection_date, pr_augur_contributor_id) FROM stdin;
-\.
-
-
---
--- Data for Name: releases; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.releases (release_id, repo_id, release_name, release_description, release_author, release_created_at, release_published_at, release_updated_at, release_is_draft, release_is_prerelease, release_tag_name, release_url, tag_only, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo (repo_id, repo_group_id, repo_git, repo_path, repo_name, repo_added, repo_status, repo_type, url, owner_id, description, primary_language, created_at, forked_from, updated_at, repo_archived_date_collected, repo_archived, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-25452	10	https://github.com/chaoss/whitepaper	\N	\N	2021-04-17 21:40:42	New		\N	\N	\N	\N	\N	Parent not available	\N	\N	0	CLI	1.0	Git	2021-04-17 21:40:42
-24441	10	https://github.com/operate-first/operate-first-twitter	\N	\N	2021-08-25 16:47:47	New		\N	\N	\N	\N	\N	Parent not available	\N	\N	0	CLI	1.0	Git	2021-08-25 16:47:47
-24442	10	https://github.com/operate-first/blueprint	\N	\N	2021-08-25 16:47:47	New		\N	\N	\N	\N	\N	Parent not available	\N	\N	0	CLI	1.0	Git	2021-08-25 16:47:47
-25445	10	https://github.com/chaoss/grimoirelab-perceval-opnfv	\N	\N	2020-04-17 21:40:39	New		\N	\N	\N	\N	\N	Parent not available	\N	\N	0	CLI	1.0	Git	2021-04-17 21:40:39
-1	1	https://github.com/chaoss/augur	\N	\N	2021-08-10 14:28:44	New		\N	\N	\N	\N	\N	Parent not available	\N	\N	0	data load	one	git	2021-06-05 18:41:14
-25430	10	https://github.com/SociallyCompute/update-test	\N	\N	2021-10-07 08:50:13	New		\N	\N	\N	\N	\N	Parent not available	\N	\N	0	\N	\N	\N	\N
-25450	10	https://github.com/chaoss/grimoirelab-hatstall	\N	\N	2021-04-17 21:40:42	New		\N	\N	\N	\N	\N	Parent not available	\N	\N	0	CLI	1.0	Git	2021-04-17 21:40:42
-\.
-
-
---
--- Data for Name: repo_badging; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_badging (badge_collection_id, repo_id, created_at, tool_source, tool_version, data_source, data_collection_date, data) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_cluster_messages; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_cluster_messages (msg_cluster_id, repo_id, cluster_content, cluster_mechanism, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_dependencies; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_dependencies (repo_dependencies_id, repo_id, dep_name, dep_count, dep_language, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_deps_libyear; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_deps_libyear (repo_deps_libyear_id, repo_id, name, requirement, type, package_manager, current_verion, latest_version, current_release_date, latest_release_date, libyear, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_deps_scorecard; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_deps_scorecard (repo_deps_scorecard_id, repo_id, name, status, score, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_group_insights; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_group_insights (rgi_id, repo_group_id, rgi_metric, rgi_value, cms_id, rgi_fresh, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_groups; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_groups (repo_group_id, rg_name, rg_description, rg_website, rg_recache, rg_last_modified, rg_type, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-1	Default Repo Group	The default repo group created by the schema generation script		0	2019-06-03 15:55:20	GitHub Organization	load	one	git	2019-06-05 13:36:25
-10	Default Repo Group	The default repo group created by the schema generation script		0	2021-06-03 15:55:20	GitHub Organization	load	one	git	2019-06-05 13:36:25
-\.
-
-
---
--- Data for Name: repo_groups_list_serve; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_groups_list_serve (rgls_id, repo_group_id, rgls_name, rgls_description, rgls_sponsor, rgls_email, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_info; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_info (repo_info_id, repo_id, last_updated, issues_enabled, open_issues, pull_requests_enabled, wiki_enabled, pages_enabled, fork_count, default_branch, watchers_count, "UUID", license, stars_count, committers_count, issue_contributors_count, changelog_file, contributing_file, license_file, code_of_conduct_file, security_issue_file, security_audit_file, status, keywords, commit_count, issues_count, issues_closed, pull_request_count, pull_requests_open, pull_requests_closed, pull_requests_merged, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_insights; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_insights (ri_id, repo_id, ri_metric, ri_value, ri_date, ri_fresh, tool_source, tool_version, data_source, data_collection_date, ri_score, ri_field, ri_detection_method) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_insights_records; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_insights_records (ri_id, repo_id, ri_metric, ri_field, ri_value, ri_date, ri_score, ri_detection_method, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_labor; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_labor (repo_labor_id, repo_id, repo_clone_date, rl_analysis_date, programming_language, file_path, file_name, total_lines, code_lines, comment_lines, blank_lines, code_complexity, repo_url, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_meta; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_meta (repo_id, rmeta_id, rmeta_name, rmeta_value, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_sbom_scans; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_sbom_scans (rsb_id, repo_id, sbom_scan) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_stats; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_stats (repo_id, rstat_id, rstat_name, rstat_value, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_test_coverage; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_test_coverage (repo_id, repo_clone_date, rtc_analysis_date, programming_language, file_path, file_name, testing_tool, file_statement_count, file_subroutine_count, file_statements_tested, file_subroutines_tested, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repo_topic; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repo_topic (repo_topic_id, repo_id, topic_id, topic_prob, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: repos_fetch_log; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.repos_fetch_log (repos_id, status, date) FROM stdin;
-\.
-
-
---
--- Data for Name: settings; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.settings (id, setting, value, last_modified) FROM stdin;
-5	report_date	committer	2019-05-07 12:47:26
-6	report_attribution	author	2019-05-07 12:47:26
-10	google_analytics	disabled	2019-05-07 12:47:26
-11	update_frequency	24	2019-05-07 12:47:26
-12	database_version	7	2019-05-07 12:47:26
-13	results_visibility	show	2019-05-07 12:47:26
-1	start_date	2001-01-01	1900-01-22 20:34:51
-4	log_level	Debug	2019-05-07 12:47:26
-2	repo_directory	/augur/repos/	2019-05-07 12:47:26
-8	affiliations_processed	2001-08-26 10:03:29.815013+00	1900-01-22 20:36:27
-9	aliases_processed	2001-08-26 10:03:29.815013+00	1900-01-22 20:36:27
-7	working_author	done	1900-01-22 20:23:43
-3	utility_status	Idle	1900-01-22 20:38:07
-\.
-
-
---
--- Data for Name: topic_words; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.topic_words (topic_words_id, topic_id, word, word_prob, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: unknown_cache; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.unknown_cache (type, repo_group_id, email, domain, added, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: unresolved_commit_emails; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.unresolved_commit_emails (email_unresolved_id, email, name, tool_source, tool_version, data_source, data_collection_date) FROM stdin;
-\.
-
-
---
--- Data for Name: utility_log; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.utility_log (id, level, status, attempted) FROM stdin;
-\.
-
-
---
--- Data for Name: working_commits; Type: TABLE DATA; Schema: augur_data; Owner: augur
---
-
-COPY augur_data.working_commits (repos_id, working_commit) FROM stdin;
-\.
-
-
---
--- Data for Name: all; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations."all" ("Name", "Bytes", "Lines", "Code", "Comment", "Blank", "Complexity", "Count", "WeightedComplexity", "Files") FROM stdin;
-\.
-
-
---
--- Data for Name: augur_settings; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.augur_settings (id, setting, value, last_modified) FROM stdin;
-1	augur_data_version	100	2021-10-12 08:41:51
-\.
-
-
---
--- Data for Name: config; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.config (id, section_name, setting_name, value, type) FROM stdin;
-1	Augur	developer	0	int
-2	Augur	version	1	int
-5	Facade	check_updates	1	int
-6	Facade	clone_repos	1	int
-7	Facade	create_xlsx_summary_files	1	int
-8	Facade	delete_marked_repos	0	int
-9	Facade	fix_affiliations	1	int
-10	Facade	force_analysis	1	int
-11	Facade	force_invalidate_caches	1	int
-12	Facade	force_updates	1	int
-13	Facade	limited_run	0	int
-14	Facade	multithreaded	1	int
-15	Facade	nuke_stored_affiliations	0	int
-16	Facade	pull_repos	1	int
-17	Facade	rebuild_caches	1	int
-18	Facade	run_analysis	1	int
-20	Server	cache_expire	3600	str
-21	Server	host	0.0.0.0	str
-22	Server	port	5000	int
-23	Server	workers	6	int
-24	Server	timeout	6000	int
-25	Server	ssl	false	bool
-26	Server	ssl_cert_file	\N	\N
-27	Server	ssl_key_file	\N	\N
-29	Logging	log_level	INFO	str
-30	Logging	verbose	0	int
-31	Logging	quiet	0	int
-32	Logging	debug	0	int
-33	Celery	concurrency	12	int
-34	Redis	cache_group	0	int
-35	Redis	connection_string	redis://localhost:6379/	str
-19	Facade	repo_directory	/facade	str
-28	Logging	logs_directory	/logs	str
-3	Keys	github_api_key	0	str
-4	Keys	gitlab_api_key	0	str
-\.
-
-
---
--- Data for Name: repos_fetch_log; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.repos_fetch_log (repos_id, status, date) FROM stdin;
-\.
-
-
---
--- Data for Name: users; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.users (user_id, login_name, login_hashword, email, text_phone, first_name, last_name, tool_source, tool_version, data_source, data_collection_date, admin) FROM stdin;
-\.
-
-
---
--- Data for Name: worker_history; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.worker_history (history_id, repo_id, worker, job_model, oauth_id, "timestamp", status, total_results) FROM stdin;
-1	1	workers.repo_info_worker.50723	repo_info	0	2021-10-17 12:05:22	Success	1
-2	1	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 10:34:30	Success	0
-3	1	workers.github_worker.9396	repo_info	0	2021-12-20 10:34:32	Stopped	0
-4	1	workers.github_worker.9396	issues	0	2021-12-20 10:34:50	Error	0
-5	1	workers.repo_info_worker.9176	repo_info	0	2021-12-20 10:34:33	Success	1
-6	1	workers.insight_worker.9082	insights	0	2021-12-20 10:34:32	Success	0
-7	1	workers.linux_badge_worker.9447	badges	0	2021-12-20 10:34:32	Success	1
-8	24441	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 10:36:31	Success	0
-9	24441	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 10:36:33	Success	0
-10	24441	workers.github_worker.9396	issues	0	2021-12-20 10:36:34	Error	0
-11	24441	workers.repo_info_worker.9176	repo_info	0	2021-12-20 10:36:34	Success	1
-12	24441	workers.linux_badge_worker.9447	badges	0	2021-12-20 10:36:33	Success	0
-13	24441	workers.pull_request_worker.9145	repo_info	0	2021-12-20 10:36:33	Stopped	0
-14	24442	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 10:38:32	Success	0
-15	1	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 10:39:00	Success	0
-16	24442	workers.github_worker.9396	issues	0	2021-12-20 10:38:35	Error	0
-17	24442	workers.insight_worker.9082	insights	0	2021-12-20 10:38:33	Success	0
-18	24442	workers.repo_info_worker.9176	repo_info	0	2021-12-20 10:38:34	Success	1
-19	24442	workers.pull_request_worker.9145	repo_info	0	2021-12-20 10:39:00	Stopped	0
-20	25430	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 10:40:32	Success	0
-21	24442	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 10:40:33	Success	0
-22	25430	workers.insight_worker.9082	insights	0	2021-12-20 10:40:34	Success	0
-23	25430	workers.linux_badge_worker.9447	badges	0	2021-12-20 10:40:34	Success	0
-24	25430	workers.pull_request_worker.9145	repo_info	0	2021-12-20 10:40:34	Stopped	0
-25	25430	workers.repo_info_worker.9176	repo_info	0	2021-12-20 10:40:35	Success	1
-26	25430	workers.insight_worker.9082	insights	0	2021-12-20 10:50:56	Success	0
-27	1	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 10:51:26	Success	0
-28	1	workers.github_worker.9396	issues	0	2021-12-20 10:51:14	Error	0
-29	1	workers.release_worker.9488	releases	0	2021-12-20 10:51:01	Success	1
-30	25430	workers.linux_badge_worker.9447	badges	0	2021-12-20 10:50:56	Success	0
-31	25430	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 10:51:26	Success	0
-32	25430	workers.pull_request_worker.9145	repo_info	0	2021-12-20 10:51:26	Stopped	0
-33	24441	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 10:52:57	Success	0
-34	1	workers.insight_worker.9082	insights	0	2021-12-20 10:52:56	Success	0
-35	1	workers.repo_info_worker.9176	repo_info	0	2021-12-20 10:52:58	Success	1
-36	1	workers.linux_badge_worker.9447	badges	0	2021-12-20 10:52:57	Success	1
-37	24441	workers.release_worker.9488	repo_info	0	2021-12-20 10:52:56	Stopped	0
-38	1	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 10:52:57	Success	0
-39	1	workers.pull_request_worker.9145	repo_info	0	2021-12-20 10:52:57	Stopped	0
-40	24441	workers.insight_worker.9082	insights	0	2021-12-20 10:54:59	Success	2
-41	24442	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 10:54:57	Success	0
-42	24442	workers.github_worker.9396	issues	0	2021-12-20 10:54:58	Error	0
-43	24441	workers.repo_info_worker.9176	repo_info	0	2021-12-20 10:54:58	Success	1
-44	24441	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 10:54:57	Success	0
-45	24441	workers.pull_request_worker.9145	repo_info	0	2021-12-20 10:54:57	Stopped	0
-46	25430	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 10:56:56	Success	0
-47	24442	workers.insight_worker.9082	insights	0	2021-12-20 10:56:57	Success	0
-48	24442	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 10:56:56	Success	0
-49	24442	workers.pull_request_worker.9145	repo_info	0	2021-12-20 10:56:56	Stopped	0
-50	24442	workers.repo_info_worker.9176	repo_info	0	2021-12-20 10:56:58	Success	1
-51	24442	workers.linux_badge_worker.9447	badges	0	2021-12-20 10:56:58	Success	0
-52	24442	workers.insight_worker.9082	insights	0	2021-12-20 11:37:29	Success	0
-53	24442	workers.repo_info_worker.9176	repo_info	0	2021-12-20 11:37:28	Success	1
-54	1	workers.github_worker.9396	issues	0	2021-12-20 11:38:35	Error	0
-55	1	workers.release_worker.9488	releases	0	2021-12-20 11:37:32	Success	1
-56	24442	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 11:37:27	Success	0
-57	24442	workers.pull_request_worker.9145	repo_info	0	2021-12-20 11:37:27	Stopped	0
-58	1	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 11:38:25	Success	0
-59	25430	workers.repo_info_worker.9176	repo_info	0	2021-12-20 11:39:29	Success	1
-60	25430	workers.linux_badge_worker.9447	badges	0	2021-12-20 11:39:28	Success	0
-61	24441	workers.github_worker.9396	issues	0	2021-12-20 11:39:49	Success	0
-62	25430	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 11:39:27	Success	0
-63	25430	workers.pull_request_worker.9145	repo_info	0	2021-12-20 11:39:27	Stopped	0
-64	24441	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 11:39:47	Success	0
-65	25430	workers.insight_worker.9082	insights	0	2021-12-20 11:39:29	Success	0
-66	1	workers.insight_worker.9082	insights	0	2021-12-20 11:41:32	Success	1
-67	1	workers.linux_badge_worker.9447	badges	0	2021-12-20 11:41:28	Success	1
-68	24442	workers.github_worker.9396	issues	0	2021-12-20 11:42:22	Success	0
-69	1	workers.repo_info_worker.9176	repo_info	0	2021-12-20 11:41:30	Success	1
-70	1	workers.pull_request_worker.9145	repo_info	0	2021-12-20 11:41:28	Stopped	0
-71	24442	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 11:42:05	Success	0
-72	24441	workers.insight_worker.9082	insights	0	2021-12-20 11:43:33	Success	1
-73	24441	workers.repo_info_worker.9176	repo_info	0	2021-12-20 11:43:29	Success	1
-74	25430	workers.github_worker.9396	issues	0	2021-12-20 11:43:31	Success	0
-75	24441	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 11:43:49	Success	0
-76	24441	workers.pull_request_worker.9145	repo_info	0	2021-12-20 11:43:49	Stopped	0
-77	25430	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 11:44:10	Success	0
-78	24441	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 11:52:46	Success	0
-79	24441	workers.repo_info_worker.9176	repo_info	0	2021-12-20 11:52:31	Success	1
-80	24441	workers.github_worker.9396	repo_info	0	2021-12-20 12:05:59	Stopped	0
-81	24442	workers.github_worker.9396	repo_info	0	2021-12-20 12:06:20	Stopped	0
-82	25430	workers.github_worker.9396	repo_info	0	2021-12-20 12:07:08	Stopped	0
-83	25430	workers.github_worker.9396	issues	0	2021-12-20 12:07:11	Success	0
-84	24441	workers.pull_request_worker.9145	repo_info	0	2021-12-20 11:53:12	Stopped	0
-85	24442	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 11:54:45	Success	0
-86	24442	workers.repo_info_worker.9176	repo_info	0	2021-12-20 11:54:28	Success	1
-87	24442	workers.linux_badge_worker.9447	badges	0	2021-12-20 11:54:28	Success	0
-88	24441	workers.release_worker.9488	repo_info	0	2021-12-20 11:54:28	Stopped	0
-89	24442	workers.insight_worker.9082	insights	0	2021-12-20 11:54:31	Success	0
-90	24442	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 12:16:33	Success	0
-91	25430	workers.linux_badge_worker.9447	badges	0	2021-12-20 11:56:31	Success	0
-92	25430	workers.repo_info_worker.9176	repo_info	0	2021-12-20 11:56:32	Success	1
-93	25430	workers.insight_worker.9082	insights	0	2021-12-20 11:56:34	Success	0
-94	1	workers.linux_badge_worker.9447	badges	0	2021-12-20 11:58:31	Success	1
-95	1	workers.repo_info_worker.9176	repo_info	0	2021-12-20 11:58:33	Success	1
-96	1	workers.insight_worker.9082	insights	0	2021-12-20 11:58:34	Success	0
-97	24442	workers.pull_request_worker.9145	repo_info	0	2021-12-20 12:16:33	Stopped	0
-98	25430	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 12:17:02	Success	0
-99	25430	workers.pull_request_worker.9145	pull_requests	0	2021-12-20 12:17:06	Success	0
-100	25430	workers.pull_request_worker.9145	repo_info	0	2021-12-20 12:17:07	Stopped	0
-101	1	workers.pull_request_worker.9145	pull_request_files	0	2021-12-20 12:17:08	Success	0
-102	1	workers.pull_request_worker.9145	repo_info	0	2021-12-20 12:17:08	Stopped	0
-103	1	workers.insight_worker.9082	insights	0	2021-12-20 12:43:41	Success	0
-104	25430	workers.pull_request_worker.9145	repo_info	0	2021-12-20 12:43:38	Stopped	0
-105	24441	workers.github_worker.9396	repo_info	14	2021-12-20 12:55:23	Stopped	0
-106	24442	workers.github_worker.9396	repo_info	14	2021-12-20 12:55:44	Stopped	0
-107	25430	workers.github_worker.9396	repo_info	14	2021-12-20 12:56:32	Stopped	0
-108	1	workers.pull_request_worker.9145	pull_requests	1017	2021-12-20 12:58:53	Success	0
-109	24441	workers.insight_worker.9082	insights	0	2021-12-20 12:45:43	Success	1
-110	24441	workers.linux_badge_worker.9447	badges	0	2021-12-20 12:45:39	Success	0
-111	24441	workers.release_worker.9488	repo_info	0	2021-12-20 12:45:38	Stopped	0
-112	24441	workers.repo_info_worker.9176	repo_info	0	2021-12-20 12:45:41	Success	1
-113	24442	workers.insight_worker.9082	insights	0	2021-12-20 12:47:41	Success	0
-114	24442	workers.repo_info_worker.9176	repo_info	1022	2021-12-20 12:47:48	Success	1
-115	24442	workers.linux_badge_worker.9447	badges	0	2021-12-20 12:47:40	Success	0
-116	25430	workers.insight_worker.9082	insights	0	2021-12-20 12:49:41	Success	0
-117	25430	workers.repo_info_worker.9176	repo_info	14	2021-12-20 12:49:48	Success	1
-118	25430	workers.linux_badge_worker.9447	badges	0	2021-12-20 12:49:40	Success	0
-119	1	workers.pull_request_worker.9145	pull_request_files	1017	2021-12-20 13:09:07	Success	0
-120	1	workers.pull_request_worker.9145	repo_info	1017	2021-12-20 13:09:07	Stopped	0
-\.
-
-
---
--- Data for Name: worker_job; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.worker_job (job_model, state, zombie_head, since_id_str, description, last_count, last_run, analysis_state, oauth_id) FROM stdin;
-\.
-
-
---
--- Data for Name: worker_oauth; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.worker_oauth (oauth_id, name, consumer_key, consumer_secret, access_token, access_token_secret, repo_directory, platform) FROM stdin;
-\.
-
-
---
--- Data for Name: worker_settings_facade; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.worker_settings_facade (id, setting, value, last_modified) FROM stdin;
-\.
-
-
---
--- Data for Name: working_commits; Type: TABLE DATA; Schema: augur_operations; Owner: augur
---
-
-COPY augur_operations.working_commits (repos_id, working_commit) FROM stdin;
-\.
-
-
---
--- Data for Name: alembic_version; Type: TABLE DATA; Schema: public; Owner: augur
---
-
-COPY public.alembic_version (version_num) FROM stdin;
-11
-\.
-
-
---
--- Data for Name: annotation_types; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.annotation_types (annotation_type_id, name) FROM stdin;
-1	REVIEW
-2	OTHER
-\.
-
-
---
--- Data for Name: annotations; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.annotations (annotation_id, document_id, annotation_type_id, identifier_id, creator_id, created_ts, comment) FROM stdin;
-\.
-
-
---
--- Data for Name: augur_repo_map; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.augur_repo_map (map_id, dosocs_pkg_id, dosocs_pkg_name, repo_id, repo_path) FROM stdin;
-\.
-
-
---
--- Data for Name: creator_types; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.creator_types (creator_type_id, name) FROM stdin;
-1	Person
-2	Organization
-3	Tool
-\.
-
-
---
--- Data for Name: creators; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.creators (creator_id, creator_type_id, name, email) FROM stdin;
-1	3	dosocs2-0.16.1	
-\.
-
-
---
--- Data for Name: document_namespaces; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.document_namespaces (document_namespace_id, uri) FROM stdin;
-\.
-
-
---
--- Data for Name: documents; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.documents (document_id, document_namespace_id, data_license_id, spdx_version, name, license_list_version, created_ts, creator_comment, document_comment, package_id) FROM stdin;
-\.
-
-
---
--- Data for Name: documents_creators; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.documents_creators (document_creator_id, document_id, creator_id) FROM stdin;
-\.
-
-
---
--- Data for Name: external_refs; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.external_refs (external_ref_id, document_id, document_namespace_id, id_string, sha256) FROM stdin;
-\.
-
-
---
--- Data for Name: file_contributors; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.file_contributors (file_contributor_id, file_id, contributor) FROM stdin;
-\.
-
-
---
--- Data for Name: file_types; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.file_types (file_type_id, name) FROM stdin;
-4	APPLICATION
-3	ARCHIVE
-5	AUDIO
-2	BINARY
-9	DOCUMENTATION
-6	IMAGE
-11	OTHER
-1	SOURCE
-10	SPDX
-7	TEXT
-8	VIDEO
-\.
-
-
---
--- Data for Name: files; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.files (file_id, file_type_id, sha256, copyright_text, package_id, comment, notice) FROM stdin;
-\.
-
-
---
--- Data for Name: files_licenses; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.files_licenses (file_license_id, file_id, license_id, extracted_text) FROM stdin;
-\.
-
-
---
--- Data for Name: files_scans; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.files_scans (file_scan_id, file_id, scanner_id) FROM stdin;
-\.
-
-
---
--- Data for Name: identifiers; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.identifiers (identifier_id, document_namespace_id, id_string, document_id, package_id, package_file_id) FROM stdin;
-\.
-
-
---
--- Data for Name: licenses; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.licenses (license_id, name, short_name, cross_reference, comment, is_spdx_official) FROM stdin;
-1	3dfx Glide License	Glide	http://spdx.org/licenses/Glide.html		t
-2	Abstyles License	Abstyles	http://spdx.org/licenses/Abstyles.html		t
-3	Academic Free License v1.1	AFL-1.1	http://spdx.org/licenses/AFL-1.1.html		t
-4	Academic Free License v1.2	AFL-1.2	http://spdx.org/licenses/AFL-1.2.html		t
-5	Academic Free License v2.0	AFL-2.0	http://spdx.org/licenses/AFL-2.0.html		t
-6	Academic Free License v2.1	AFL-2.1	http://spdx.org/licenses/AFL-2.1.html		t
-7	Academic Free License v3.0	AFL-3.0	http://spdx.org/licenses/AFL-3.0.html		t
-8	Academy of Motion Picture Arts and Sciences BSD	AMPAS	http://spdx.org/licenses/AMPAS.html		t
-9	Adaptive Public License 1.0	APL-1.0	http://spdx.org/licenses/APL-1.0.html		t
-10	Adobe Glyph List License	Adobe-Glyph	http://spdx.org/licenses/Adobe-Glyph.html		t
-11	Adobe Postscript AFM License	APAFML	http://spdx.org/licenses/APAFML.html		t
-12	Adobe Systems Incorporated Source Code License Agreement	Adobe-2006	http://spdx.org/licenses/Adobe-2006.html		t
-13	Affero General Public License v1.0	AGPL-1.0	http://spdx.org/licenses/AGPL-1.0.html		t
-14	Afmparse License	Afmparse	http://spdx.org/licenses/Afmparse.html		t
-15	Aladdin Free Public License	Aladdin	http://spdx.org/licenses/Aladdin.html		t
-16	Amazon Digital Services License	ADSL	http://spdx.org/licenses/ADSL.html		t
-17	AMD's plpa_map.c License	AMDPLPA	http://spdx.org/licenses/AMDPLPA.html		t
-18	ANTLR Software Rights Notice	ANTLR-PD	http://spdx.org/licenses/ANTLR-PD.html		t
-19	Apache License 1.0	Apache-1.0	http://spdx.org/licenses/Apache-1.0.html		t
-20	Apache License 1.1	Apache-1.1	http://spdx.org/licenses/Apache-1.1.html		t
-21	Apache License 2.0	Apache-2.0	http://spdx.org/licenses/Apache-2.0.html		t
-22	Apple MIT License	AML	http://spdx.org/licenses/AML.html		t
-23	Apple Public Source License 1.0	APSL-1.0	http://spdx.org/licenses/APSL-1.0.html		t
-24	Apple Public Source License 1.1	APSL-1.1	http://spdx.org/licenses/APSL-1.1.html		t
-25	Apple Public Source License 1.2	APSL-1.2	http://spdx.org/licenses/APSL-1.2.html		t
-26	Apple Public Source License 2.0	APSL-2.0	http://spdx.org/licenses/APSL-2.0.html		t
-27	Artistic License 1.0	Artistic-1.0	http://spdx.org/licenses/Artistic-1.0.html		t
-28	Artistic License 1.0 (Perl)	Artistic-1.0-Perl	http://spdx.org/licenses/Artistic-1.0-Perl.html		t
-29	Artistic License 1.0 w/clause 8	Artistic-1.0-cl8	http://spdx.org/licenses/Artistic-1.0-cl8.html		t
-30	Artistic License 2.0	Artistic-2.0	http://spdx.org/licenses/Artistic-2.0.html		t
-31	Attribution Assurance License	AAL	http://spdx.org/licenses/AAL.html		t
-32	Bahyph License	Bahyph	http://spdx.org/licenses/Bahyph.html		t
-33	Barr License	Barr	http://spdx.org/licenses/Barr.html		t
-34	Beerware License	Beerware	http://spdx.org/licenses/Beerware.html		t
-35	BitTorrent Open Source License v1.0	BitTorrent-1.0	http://spdx.org/licenses/BitTorrent-1.0.html		t
-36	BitTorrent Open Source License v1.1	BitTorrent-1.1	http://spdx.org/licenses/BitTorrent-1.1.html		t
-37	Boost Software License 1.0	BSL-1.0	http://spdx.org/licenses/BSL-1.0.html		t
-38	Borceux license	Borceux	http://spdx.org/licenses/Borceux.html		t
-39	BSD 2-clause "Simplified" License	BSD-2-Clause	http://spdx.org/licenses/BSD-2-Clause.html		t
-40	BSD 2-clause FreeBSD License	BSD-2-Clause-FreeBSD	http://spdx.org/licenses/BSD-2-Clause-FreeBSD.html		t
-41	BSD 2-clause NetBSD License	BSD-2-Clause-NetBSD	http://spdx.org/licenses/BSD-2-Clause-NetBSD.html		t
-42	BSD 3-clause "New" or "Revised" License	BSD-3-Clause	http://spdx.org/licenses/BSD-3-Clause.html		t
-43	BSD 3-clause Clear License	BSD-3-Clause-Clear	http://spdx.org/licenses/BSD-3-Clause-Clear.html		t
-44	BSD 4-clause "Original" or "Old" License	BSD-4-Clause	http://spdx.org/licenses/BSD-4-Clause.html		t
-45	BSD Protection License	BSD-Protection	http://spdx.org/licenses/BSD-Protection.html		t
-46	BSD with attribution	BSD-3-Clause-Attribution	http://spdx.org/licenses/BSD-3-Clause-Attribution.html		t
-47	BSD Zero Clause License	0BSD	http://spdx.org/licenses/0BSD.html		t
-48	BSD-4-Clause (University of California-Specific)	BSD-4-Clause-UC	http://spdx.org/licenses/BSD-4-Clause-UC.html		t
-49	bzip2 and libbzip2 License v1.0.5	bzip2-1.0.5	http://spdx.org/licenses/bzip2-1.0.5.html		t
-50	bzip2 and libbzip2 License v1.0.6	bzip2-1.0.6	http://spdx.org/licenses/bzip2-1.0.6.html		t
-51	Caldera License	Caldera	http://spdx.org/licenses/Caldera.html		t
-52	CeCILL Free Software License Agreement v1.0	CECILL-1.0	http://spdx.org/licenses/CECILL-1.0.html		t
-53	CeCILL Free Software License Agreement v1.1	CECILL-1.1	http://spdx.org/licenses/CECILL-1.1.html		t
-54	CeCILL Free Software License Agreement v2.0	CECILL-2.0	http://spdx.org/licenses/CECILL-2.0.html		t
-55	CeCILL Free Software License Agreement v2.1	CECILL-2.1	http://spdx.org/licenses/CECILL-2.1.html		t
-56	CeCILL-B Free Software License Agreement	CECILL-B	http://spdx.org/licenses/CECILL-B.html		t
-57	CeCILL-C Free Software License Agreement	CECILL-C	http://spdx.org/licenses/CECILL-C.html		t
-58	Clarified Artistic License	ClArtistic	http://spdx.org/licenses/ClArtistic.html		t
-59	CMU License	MIT-CMU	http://spdx.org/licenses/MIT-CMU.html		t
-60	CNRI Jython License	CNRI-Jython	http://spdx.org/licenses/CNRI-Jython.html		t
-61	CNRI Python License	CNRI-Python	http://spdx.org/licenses/CNRI-Python.html		t
-62	CNRI Python Open Source GPL Compatible License Agreement	CNRI-Python-GPL-Compatible	http://spdx.org/licenses/CNRI-Python-GPL-Compatible.html		t
-63	Code Project Open License 1.02	CPOL-1.02	http://spdx.org/licenses/CPOL-1.02.html		t
-64	Common Development and Distribution License 1.0	CDDL-1.0	http://spdx.org/licenses/CDDL-1.0.html		t
-65	Common Development and Distribution License 1.1	CDDL-1.1	http://spdx.org/licenses/CDDL-1.1.html		t
-66	Common Public Attribution License 1.0	CPAL-1.0	http://spdx.org/licenses/CPAL-1.0.html		t
-67	Common Public License 1.0	CPL-1.0	http://spdx.org/licenses/CPL-1.0.html		t
-68	Computer Associates Trusted Open Source License 1.1	CATOSL-1.1	http://spdx.org/licenses/CATOSL-1.1.html		t
-69	Condor Public License v1.1	Condor-1.1	http://spdx.org/licenses/Condor-1.1.html		t
-70	Creative Commons Attribution 1.0	CC-BY-1.0	http://spdx.org/licenses/CC-BY-1.0.html		t
-71	Creative Commons Attribution 2.0	CC-BY-2.0	http://spdx.org/licenses/CC-BY-2.0.html		t
-72	Creative Commons Attribution 2.5	CC-BY-2.5	http://spdx.org/licenses/CC-BY-2.5.html		t
-73	Creative Commons Attribution 3.0	CC-BY-3.0	http://spdx.org/licenses/CC-BY-3.0.html		t
-74	Creative Commons Attribution 4.0	CC-BY-4.0	http://spdx.org/licenses/CC-BY-4.0.html		t
-75	Creative Commons Attribution No Derivatives 1.0	CC-BY-ND-1.0	http://spdx.org/licenses/CC-BY-ND-1.0.html		t
-76	Creative Commons Attribution No Derivatives 2.0	CC-BY-ND-2.0	http://spdx.org/licenses/CC-BY-ND-2.0.html		t
-77	Creative Commons Attribution No Derivatives 2.5	CC-BY-ND-2.5	http://spdx.org/licenses/CC-BY-ND-2.5.html		t
-78	Creative Commons Attribution No Derivatives 3.0	CC-BY-ND-3.0	http://spdx.org/licenses/CC-BY-ND-3.0.html		t
-79	Creative Commons Attribution No Derivatives 4.0	CC-BY-ND-4.0	http://spdx.org/licenses/CC-BY-ND-4.0.html		t
-80	Creative Commons Attribution Non Commercial 1.0	CC-BY-NC-1.0	http://spdx.org/licenses/CC-BY-NC-1.0.html		t
-81	Creative Commons Attribution Non Commercial 2.0	CC-BY-NC-2.0	http://spdx.org/licenses/CC-BY-NC-2.0.html		t
-82	Creative Commons Attribution Non Commercial 2.5	CC-BY-NC-2.5	http://spdx.org/licenses/CC-BY-NC-2.5.html		t
-83	Creative Commons Attribution Non Commercial 3.0	CC-BY-NC-3.0	http://spdx.org/licenses/CC-BY-NC-3.0.html		t
-84	Creative Commons Attribution Non Commercial 4.0	CC-BY-NC-4.0	http://spdx.org/licenses/CC-BY-NC-4.0.html		t
-85	Creative Commons Attribution Non Commercial No Derivatives 1.0	CC-BY-NC-ND-1.0	http://spdx.org/licenses/CC-BY-NC-ND-1.0.html		t
-86	Creative Commons Attribution Non Commercial No Derivatives 2.0	CC-BY-NC-ND-2.0	http://spdx.org/licenses/CC-BY-NC-ND-2.0.html		t
-87	Creative Commons Attribution Non Commercial No Derivatives 2.5	CC-BY-NC-ND-2.5	http://spdx.org/licenses/CC-BY-NC-ND-2.5.html		t
-88	Creative Commons Attribution Non Commercial No Derivatives 3.0	CC-BY-NC-ND-3.0	http://spdx.org/licenses/CC-BY-NC-ND-3.0.html		t
-89	Creative Commons Attribution Non Commercial No Derivatives 4.0	CC-BY-NC-ND-4.0	http://spdx.org/licenses/CC-BY-NC-ND-4.0.html		t
-90	Creative Commons Attribution Non Commercial Share Alike 1.0	CC-BY-NC-SA-1.0	http://spdx.org/licenses/CC-BY-NC-SA-1.0.html		t
-91	Creative Commons Attribution Non Commercial Share Alike 2.0	CC-BY-NC-SA-2.0	http://spdx.org/licenses/CC-BY-NC-SA-2.0.html		t
-92	Creative Commons Attribution Non Commercial Share Alike 2.5	CC-BY-NC-SA-2.5	http://spdx.org/licenses/CC-BY-NC-SA-2.5.html		t
-93	Creative Commons Attribution Non Commercial Share Alike 3.0	CC-BY-NC-SA-3.0	http://spdx.org/licenses/CC-BY-NC-SA-3.0.html		t
-94	Creative Commons Attribution Non Commercial Share Alike 4.0	CC-BY-NC-SA-4.0	http://spdx.org/licenses/CC-BY-NC-SA-4.0.html		t
-95	Creative Commons Attribution Share Alike 1.0	CC-BY-SA-1.0	http://spdx.org/licenses/CC-BY-SA-1.0.html		t
-96	Creative Commons Attribution Share Alike 2.0	CC-BY-SA-2.0	http://spdx.org/licenses/CC-BY-SA-2.0.html		t
-97	Creative Commons Attribution Share Alike 2.5	CC-BY-SA-2.5	http://spdx.org/licenses/CC-BY-SA-2.5.html		t
-98	Creative Commons Attribution Share Alike 3.0	CC-BY-SA-3.0	http://spdx.org/licenses/CC-BY-SA-3.0.html		t
-99	Creative Commons Attribution Share Alike 4.0	CC-BY-SA-4.0	http://spdx.org/licenses/CC-BY-SA-4.0.html		t
-100	Creative Commons Zero v1.0 Universal	CC0-1.0	http://spdx.org/licenses/CC0-1.0.html		t
-101	Crossword License	Crossword	http://spdx.org/licenses/Crossword.html		t
-102	CrystalStacker License	CrystalStacker	http://spdx.org/licenses/CrystalStacker.html		t
-103	CUA Office Public License v1.0	CUA-OPL-1.0	http://spdx.org/licenses/CUA-OPL-1.0.html		t
-104	Cube License	Cube	http://spdx.org/licenses/Cube.html		t
-105	Deutsche Freie Software Lizenz	D-FSL-1.0	http://spdx.org/licenses/D-FSL-1.0.html		t
-106	diffmark license	diffmark	http://spdx.org/licenses/diffmark.html		t
-107	Do What The F*ck You Want To Public License	WTFPL	http://spdx.org/licenses/WTFPL.html		t
-108	DOC License	DOC	http://spdx.org/licenses/DOC.html		t
-109	Dotseqn License	Dotseqn	http://spdx.org/licenses/Dotseqn.html		t
-110	DSDP License	DSDP	http://spdx.org/licenses/DSDP.html		t
-111	dvipdfm License	dvipdfm	http://spdx.org/licenses/dvipdfm.html		t
-112	Eclipse Public License 1.0	EPL-1.0	http://spdx.org/licenses/EPL-1.0.html		t
-113	Educational Community License v1.0	ECL-1.0	http://spdx.org/licenses/ECL-1.0.html		t
-114	Educational Community License v2.0	ECL-2.0	http://spdx.org/licenses/ECL-2.0.html		t
-115	eGenix.com Public License 1.1.0	eGenix	http://spdx.org/licenses/eGenix.html		t
-116	Eiffel Forum License v1.0	EFL-1.0	http://spdx.org/licenses/EFL-1.0.html		t
-117	Eiffel Forum License v2.0	EFL-2.0	http://spdx.org/licenses/EFL-2.0.html		t
-118	Enlightenment License (e16)	MIT-advertising	http://spdx.org/licenses/MIT-advertising.html		t
-119	enna License	MIT-enna	http://spdx.org/licenses/MIT-enna.html		t
-120	Entessa Public License v1.0	Entessa	http://spdx.org/licenses/Entessa.html		t
-121	Erlang Public License v1.1	ErlPL-1.1	http://spdx.org/licenses/ErlPL-1.1.html		t
-122	EU DataGrid Software License	EUDatagrid	http://spdx.org/licenses/EUDatagrid.html		t
-123	European Union Public License 1.0	EUPL-1.0	http://spdx.org/licenses/EUPL-1.0.html		t
-124	European Union Public License 1.1	EUPL-1.1	http://spdx.org/licenses/EUPL-1.1.html		t
-125	Eurosym License	Eurosym	http://spdx.org/licenses/Eurosym.html		t
-126	Fair License	Fair	http://spdx.org/licenses/Fair.html		t
-127	feh License	MIT-feh	http://spdx.org/licenses/MIT-feh.html		t
-128	Frameworx Open License 1.0	Frameworx-1.0	http://spdx.org/licenses/Frameworx-1.0.html		t
-129	FreeImage Public License v1.0	FreeImage	http://spdx.org/licenses/FreeImage.html		t
-130	Freetype Project License	FTL	http://spdx.org/licenses/FTL.html		t
-131	FSF Unlimited License	FSFUL	http://spdx.org/licenses/FSFUL.html		t
-132	FSF Unlimited License (with License Retention)	FSFULLR	http://spdx.org/licenses/FSFULLR.html		t
-133	Giftware License	Giftware	http://spdx.org/licenses/Giftware.html		t
-134	GL2PS License	GL2PS	http://spdx.org/licenses/GL2PS.html		t
-135	Glulxe License	Glulxe	http://spdx.org/licenses/Glulxe.html		t
-136	GNU Affero General Public License v3.0	AGPL-3.0	http://spdx.org/licenses/AGPL-3.0.html		t
-137	GNU Free Documentation License v1.1	GFDL-1.1	http://spdx.org/licenses/GFDL-1.1.html		t
-138	GNU Free Documentation License v1.2	GFDL-1.2	http://spdx.org/licenses/GFDL-1.2.html		t
-139	GNU Free Documentation License v1.3	GFDL-1.3	http://spdx.org/licenses/GFDL-1.3.html		t
-140	GNU General Public License v1.0 only	GPL-1.0	http://spdx.org/licenses/GPL-1.0.html		t
-141	GNU General Public License v2.0 only	GPL-2.0	http://spdx.org/licenses/GPL-2.0.html		t
-142	GNU General Public License v3.0 only	GPL-3.0	http://spdx.org/licenses/GPL-3.0.html		t
-143	GNU Lesser General Public License v2.1 only	LGPL-2.1	http://spdx.org/licenses/LGPL-2.1.html		t
-144	GNU Lesser General Public License v3.0 only	LGPL-3.0	http://spdx.org/licenses/LGPL-3.0.html		t
-145	GNU Library General Public License v2 only	LGPL-2.0	http://spdx.org/licenses/LGPL-2.0.html		t
-146	gnuplot License	gnuplot	http://spdx.org/licenses/gnuplot.html		t
-147	gSOAP Public License v1.3b	gSOAP-1.3b	http://spdx.org/licenses/gSOAP-1.3b.html		t
-148	Haskell Language Report License	HaskellReport	http://spdx.org/licenses/HaskellReport.html		t
-149	Historic Permission Notice and Disclaimer	HPND	http://spdx.org/licenses/HPND.html		t
-150	IBM PowerPC Initialization and Boot Software	IBM-pibs	http://spdx.org/licenses/IBM-pibs.html		t
-151	IBM Public License v1.0	IPL-1.0	http://spdx.org/licenses/IPL-1.0.html		t
-152	ICU License	ICU	http://spdx.org/licenses/ICU.html		t
-153	ImageMagick License	ImageMagick	http://spdx.org/licenses/ImageMagick.html		t
-154	iMatix Standard Function Library Agreement	iMatix	http://spdx.org/licenses/iMatix.html		t
-155	Imlib2 License	Imlib2	http://spdx.org/licenses/Imlib2.html		t
-156	Independent JPEG Group License	IJG	http://spdx.org/licenses/IJG.html		t
-157	Intel ACPI Software License Agreement	Intel-ACPI	http://spdx.org/licenses/Intel-ACPI.html		t
-158	Intel Open Source License	Intel	http://spdx.org/licenses/Intel.html		t
-159	Interbase Public License v1.0	Interbase-1.0	http://spdx.org/licenses/Interbase-1.0.html		t
-160	IPA Font License	IPA	http://spdx.org/licenses/IPA.html		t
-161	ISC License	ISC	http://spdx.org/licenses/ISC.html		t
-162	JasPer License	JasPer-2.0	http://spdx.org/licenses/JasPer-2.0.html		t
-163	JSON License	JSON	http://spdx.org/licenses/JSON.html		t
-164	LaTeX Project Public License 1.3a	LPPL-1.3a	http://spdx.org/licenses/LPPL-1.3a.html		t
-165	LaTeX Project Public License v1.0	LPPL-1.0	http://spdx.org/licenses/LPPL-1.0.html		t
-166	LaTeX Project Public License v1.1	LPPL-1.1	http://spdx.org/licenses/LPPL-1.1.html		t
-167	LaTeX Project Public License v1.2	LPPL-1.2	http://spdx.org/licenses/LPPL-1.2.html		t
-168	LaTeX Project Public License v1.3c	LPPL-1.3c	http://spdx.org/licenses/LPPL-1.3c.html		t
-169	Latex2e License	Latex2e	http://spdx.org/licenses/Latex2e.html		t
-170	Lawrence Berkeley National Labs BSD variant license	BSD-3-Clause-LBNL	http://spdx.org/licenses/BSD-3-Clause-LBNL.html		t
-171	Leptonica License	Leptonica	http://spdx.org/licenses/Leptonica.html		t
-172	Lesser General Public License For Linguistic Resources	LGPLLR	http://spdx.org/licenses/LGPLLR.html		t
-173	libpng License	Libpng	http://spdx.org/licenses/Libpng.html		t
-174	libtiff License	libtiff	http://spdx.org/licenses/libtiff.html		t
-175	Lucent Public License v1.02	LPL-1.02	http://spdx.org/licenses/LPL-1.02.html		t
-176	Lucent Public License Version 1.0	LPL-1.0	http://spdx.org/licenses/LPL-1.0.html		t
-177	MakeIndex License	MakeIndex	http://spdx.org/licenses/MakeIndex.html		t
-178	Matrix Template Library License	MTLL	http://spdx.org/licenses/MTLL.html		t
-179	Microsoft Public License	MS-PL	http://spdx.org/licenses/MS-PL.html		t
-180	Microsoft Reciprocal License	MS-RL	http://spdx.org/licenses/MS-RL.html		t
-181	MirOS Licence	MirOS	http://spdx.org/licenses/MirOS.html		t
-182	MIT +no-false-attribs license	MITNFA	http://spdx.org/licenses/MITNFA.html		t
-183	MIT License	MIT	http://spdx.org/licenses/MIT.html		t
-184	Motosoto License	Motosoto	http://spdx.org/licenses/Motosoto.html		t
-185	Mozilla Public License 1.0	MPL-1.0	http://spdx.org/licenses/MPL-1.0.html		t
-186	Mozilla Public License 1.1	MPL-1.1	http://spdx.org/licenses/MPL-1.1.html		t
-187	Mozilla Public License 2.0	MPL-2.0	http://spdx.org/licenses/MPL-2.0.html		t
-188	Mozilla Public License 2.0 (no copyleft exception)	MPL-2.0-no-copyleft-exception	http://spdx.org/licenses/MPL-2.0-no-copyleft-exception.html		t
-189	mpich2 License	mpich2	http://spdx.org/licenses/mpich2.html		t
-190	Multics License	Multics	http://spdx.org/licenses/Multics.html		t
-191	Mup License	Mup	http://spdx.org/licenses/Mup.html		t
-192	NASA Open Source Agreement 1.3	NASA-1.3	http://spdx.org/licenses/NASA-1.3.html		t
-193	Naumen Public License	Naumen	http://spdx.org/licenses/Naumen.html		t
-194	Net Boolean Public License v1	NBPL-1.0	http://spdx.org/licenses/NBPL-1.0.html		t
-195	NetCDF license	NetCDF	http://spdx.org/licenses/NetCDF.html		t
-196	Nethack General Public License	NGPL	http://spdx.org/licenses/NGPL.html		t
-197	Netizen Open Source License	NOSL	http://spdx.org/licenses/NOSL.html		t
-198	Netscape Public License v1.0	NPL-1.0	http://spdx.org/licenses/NPL-1.0.html		t
-199	Netscape Public License v1.1	NPL-1.1	http://spdx.org/licenses/NPL-1.1.html		t
-200	Newsletr License	Newsletr	http://spdx.org/licenses/Newsletr.html		t
-201	No Limit Public License	NLPL	http://spdx.org/licenses/NLPL.html		t
-202	Nokia Open Source License	Nokia	http://spdx.org/licenses/Nokia.html		t
-203	Non-Profit Open Software License 3.0	NPOSL-3.0	http://spdx.org/licenses/NPOSL-3.0.html		t
-204	Noweb License	Noweb	http://spdx.org/licenses/Noweb.html		t
-205	NRL License	NRL	http://spdx.org/licenses/NRL.html		t
-206	NTP License	NTP	http://spdx.org/licenses/NTP.html		t
-207	Nunit License	Nunit	http://spdx.org/licenses/Nunit.html		t
-208	OCLC Research Public License 2.0	OCLC-2.0	http://spdx.org/licenses/OCLC-2.0.html		t
-209	ODC Open Database License v1.0	ODbL-1.0	http://spdx.org/licenses/ODbL-1.0.html		t
-210	ODC Public Domain Dedication &amp; License 1.0	PDDL-1.0	http://spdx.org/licenses/PDDL-1.0.html		t
-211	Open Group Test Suite License	OGTSL	http://spdx.org/licenses/OGTSL.html		t
-212	Open LDAP Public License  2.2.2	OLDAP-2.2.2	http://spdx.org/licenses/OLDAP-2.2.2.html		t
-213	Open LDAP Public License v1.1	OLDAP-1.1	http://spdx.org/licenses/OLDAP-1.1.html		t
-214	Open LDAP Public License v1.2	OLDAP-1.2	http://spdx.org/licenses/OLDAP-1.2.html		t
-215	Open LDAP Public License v1.3	OLDAP-1.3	http://spdx.org/licenses/OLDAP-1.3.html		t
-216	Open LDAP Public License v1.4	OLDAP-1.4	http://spdx.org/licenses/OLDAP-1.4.html		t
-217	Open LDAP Public License v2.0 (or possibly 2.0A and 2.0B)	OLDAP-2.0	http://spdx.org/licenses/OLDAP-2.0.html		t
-218	Open LDAP Public License v2.0.1	OLDAP-2.0.1	http://spdx.org/licenses/OLDAP-2.0.1.html		t
-219	Open LDAP Public License v2.1	OLDAP-2.1	http://spdx.org/licenses/OLDAP-2.1.html		t
-220	Open LDAP Public License v2.2	OLDAP-2.2	http://spdx.org/licenses/OLDAP-2.2.html		t
-221	Open LDAP Public License v2.2.1	OLDAP-2.2.1	http://spdx.org/licenses/OLDAP-2.2.1.html		t
-222	Open LDAP Public License v2.3	OLDAP-2.3	http://spdx.org/licenses/OLDAP-2.3.html		t
-223	Open LDAP Public License v2.4	OLDAP-2.4	http://spdx.org/licenses/OLDAP-2.4.html		t
-224	Open LDAP Public License v2.5	OLDAP-2.5	http://spdx.org/licenses/OLDAP-2.5.html		t
-225	Open LDAP Public License v2.6	OLDAP-2.6	http://spdx.org/licenses/OLDAP-2.6.html		t
-226	Open LDAP Public License v2.7	OLDAP-2.7	http://spdx.org/licenses/OLDAP-2.7.html		t
-227	Open LDAP Public License v2.8	OLDAP-2.8	http://spdx.org/licenses/OLDAP-2.8.html		t
-228	Open Market License	OML	http://spdx.org/licenses/OML.html		t
-229	Open Public License v1.0	OPL-1.0	http://spdx.org/licenses/OPL-1.0.html		t
-230	Open Software License 1.0	OSL-1.0	http://spdx.org/licenses/OSL-1.0.html		t
-231	Open Software License 1.1	OSL-1.1	http://spdx.org/licenses/OSL-1.1.html		t
-232	Open Software License 2.0	OSL-2.0	http://spdx.org/licenses/OSL-2.0.html		t
-233	Open Software License 2.1	OSL-2.1	http://spdx.org/licenses/OSL-2.1.html		t
-234	Open Software License 3.0	OSL-3.0	http://spdx.org/licenses/OSL-3.0.html		t
-235	OpenSSL License	OpenSSL	http://spdx.org/licenses/OpenSSL.html		t
-236	PHP License v3.0	PHP-3.0	http://spdx.org/licenses/PHP-3.0.html		t
-237	PHP License v3.01	PHP-3.01	http://spdx.org/licenses/PHP-3.01.html		t
-238	Plexus Classworlds License	Plexus	http://spdx.org/licenses/Plexus.html		t
-239	PostgreSQL License	PostgreSQL	http://spdx.org/licenses/PostgreSQL.html		t
-240	psfrag License	psfrag	http://spdx.org/licenses/psfrag.html		t
-241	psutils License	psutils	http://spdx.org/licenses/psutils.html		t
-242	Python License 2.0	Python-2.0	http://spdx.org/licenses/Python-2.0.html		t
-243	Q Public License 1.0	QPL-1.0	http://spdx.org/licenses/QPL-1.0.html		t
-244	Qhull License	Qhull	http://spdx.org/licenses/Qhull.html		t
-245	Rdisc License	Rdisc	http://spdx.org/licenses/Rdisc.html		t
-246	RealNetworks Public Source License v1.0	RPSL-1.0	http://spdx.org/licenses/RPSL-1.0.html		t
-247	Reciprocal Public License 1.1	RPL-1.1	http://spdx.org/licenses/RPL-1.1.html		t
-248	Reciprocal Public License 1.5	RPL-1.5	http://spdx.org/licenses/RPL-1.5.html		t
-249	Red Hat eCos Public License v1.1	RHeCos-1.1	http://spdx.org/licenses/RHeCos-1.1.html		t
-250	Ricoh Source Code Public License	RSCPL	http://spdx.org/licenses/RSCPL.html		t
-251	RSA Message-Digest License 	RSA-MD	http://spdx.org/licenses/RSA-MD.html		t
-252	Ruby License	Ruby	http://spdx.org/licenses/Ruby.html		t
-253	Sax Public Domain Notice	SAX-PD	http://spdx.org/licenses/SAX-PD.html		t
-254	Saxpath License	Saxpath	http://spdx.org/licenses/Saxpath.html		t
-255	SCEA Shared Source License	SCEA	http://spdx.org/licenses/SCEA.html		t
-256	Scheme Widget Library (SWL) Software License Agreement	SWL	http://spdx.org/licenses/SWL.html		t
-257	Sendmail License	Sendmail	http://spdx.org/licenses/Sendmail.html		t
-258	SGI Free Software License B v1.0	SGI-B-1.0	http://spdx.org/licenses/SGI-B-1.0.html		t
-259	SGI Free Software License B v1.1	SGI-B-1.1	http://spdx.org/licenses/SGI-B-1.1.html		t
-260	SGI Free Software License B v2.0	SGI-B-2.0	http://spdx.org/licenses/SGI-B-2.0.html		t
-261	SIL Open Font License 1.0	OFL-1.0	http://spdx.org/licenses/OFL-1.0.html		t
-262	SIL Open Font License 1.1	OFL-1.1	http://spdx.org/licenses/OFL-1.1.html		t
-263	Simple Public License 2.0	SimPL-2.0	http://spdx.org/licenses/SimPL-2.0.html		t
-264	Sleepycat License	Sleepycat	http://spdx.org/licenses/Sleepycat.html		t
-265	SNIA Public License 1.1	SNIA	http://spdx.org/licenses/SNIA.html		t
-266	Spencer License 86	Spencer-86	http://spdx.org/licenses/Spencer-86.html		t
-267	Spencer License 94	Spencer-94	http://spdx.org/licenses/Spencer-94.html		t
-268	Spencer License 99	Spencer-99	http://spdx.org/licenses/Spencer-99.html		t
-269	Standard ML of New Jersey License	SMLNJ	http://spdx.org/licenses/SMLNJ.html		t
-270	SugarCRM Public License v1.1.3	SugarCRM-1.1.3	http://spdx.org/licenses/SugarCRM-1.1.3.html		t
-271	Sun Industry Standards Source License v1.1	SISSL	http://spdx.org/licenses/SISSL.html		t
-272	Sun Industry Standards Source License v1.2	SISSL-1.2	http://spdx.org/licenses/SISSL-1.2.html		t
-273	Sun Public License v1.0	SPL-1.0	http://spdx.org/licenses/SPL-1.0.html		t
-274	Sybase Open Watcom Public License 1.0	Watcom-1.0	http://spdx.org/licenses/Watcom-1.0.html		t
-275	TCL/TK License	TCL	http://spdx.org/licenses/TCL.html		t
-276	The Unlicense	Unlicense	http://spdx.org/licenses/Unlicense.html		t
-277	TMate Open Source License	TMate	http://spdx.org/licenses/TMate.html		t
-278	TORQUE v2.5+ Software License v1.1	TORQUE-1.1	http://spdx.org/licenses/TORQUE-1.1.html		t
-279	Trusster Open Source License	TOSL	http://spdx.org/licenses/TOSL.html		t
-280	Unicode Terms of Use	Unicode-TOU	http://spdx.org/licenses/Unicode-TOU.html		t
-281	Universal Permissive License v1.0	UPL-1.0	http://spdx.org/licenses/UPL-1.0.html		t
-282	University of Illinois/NCSA Open Source License	NCSA	http://spdx.org/licenses/NCSA.html		t
-283	Vim License	Vim	http://spdx.org/licenses/Vim.html		t
-284	VOSTROM Public License for Open Source	VOSTROM	http://spdx.org/licenses/VOSTROM.html		t
-285	Vovida Software License v1.0	VSL-1.0	http://spdx.org/licenses/VSL-1.0.html		t
-286	W3C Software Notice and License (1998-07-20)	W3C-19980720	http://spdx.org/licenses/W3C-19980720.html		t
-287	W3C Software Notice and License (2002-12-31)	W3C	http://spdx.org/licenses/W3C.html		t
-288	Wsuipa License	Wsuipa	http://spdx.org/licenses/Wsuipa.html		t
-289	X.Net License	Xnet	http://spdx.org/licenses/Xnet.html		t
-290	X11 License	X11	http://spdx.org/licenses/X11.html		t
-291	Xerox License	Xerox	http://spdx.org/licenses/Xerox.html		t
-292	XFree86 License 1.1	XFree86-1.1	http://spdx.org/licenses/XFree86-1.1.html		t
-293	xinetd License	xinetd	http://spdx.org/licenses/xinetd.html		t
-294	XPP License	xpp	http://spdx.org/licenses/xpp.html		t
-295	XSkat License	XSkat	http://spdx.org/licenses/XSkat.html		t
-296	Yahoo! Public License v1.0	YPL-1.0	http://spdx.org/licenses/YPL-1.0.html		t
-297	Yahoo! Public License v1.1	YPL-1.1	http://spdx.org/licenses/YPL-1.1.html		t
-298	Zed License	Zed	http://spdx.org/licenses/Zed.html		t
-299	Zend License v2.0	Zend-2.0	http://spdx.org/licenses/Zend-2.0.html		t
-300	Zimbra Public License v1.3	Zimbra-1.3	http://spdx.org/licenses/Zimbra-1.3.html		t
-301	Zimbra Public License v1.4	Zimbra-1.4	http://spdx.org/licenses/Zimbra-1.4.html		t
-302	zlib License	Zlib	http://spdx.org/licenses/Zlib.html		t
-303	zlib/libpng License with Acknowledgement	zlib-acknowledgement	http://spdx.org/licenses/zlib-acknowledgement.html		t
-304	Zope Public License 1.1	ZPL-1.1	http://spdx.org/licenses/ZPL-1.1.html		t
-305	Zope Public License 2.0	ZPL-2.0	http://spdx.org/licenses/ZPL-2.0.html		t
-306	Zope Public License 2.1	ZPL-2.1	http://spdx.org/licenses/ZPL-2.1.html		t
-307	eCos license version 2.0	eCos-2.0	http://spdx.org/licenses/eCos-2.0		t
-308	GNU General Public License v1.0 or later	GPL-1.0+	http://spdx.org/licenses/GPL-1.0+		t
-309	GNU General Public License v2.0 or later	GPL-2.0+	http://spdx.org/licenses/GPL-2.0+		t
-310	GNU General Public License v2.0 w/Autoconf exception	GPL-2.0-with-autoconf-exception	http://spdx.org/licenses/GPL-2.0-with-autoconf-exception		t
-311	GNU General Public License v2.0 w/Bison exception	GPL-2.0-with-bison-exception	http://spdx.org/licenses/GPL-2.0-with-bison-exception		t
-312	GNU General Public License v2.0 w/Classpath exception	GPL-2.0-with-classpath-exception	http://spdx.org/licenses/GPL-2.0-with-classpath-exception		t
-313	GNU General Public License v2.0 w/Font exception	GPL-2.0-with-font-exception	http://spdx.org/licenses/GPL-2.0-with-font-exception		t
-314	GNU General Public License v2.0 w/GCC Runtime Library exception	GPL-2.0-with-GCC-exception	http://spdx.org/licenses/GPL-2.0-with-GCC-exception		t
-315	GNU General Public License v3.0 or later	GPL-3.0+	http://spdx.org/licenses/GPL-3.0+		t
-316	GNU General Public License v3.0 w/Autoconf exception	GPL-3.0-with-autoconf-exception	http://spdx.org/licenses/GPL-3.0-with-autoconf-exception		t
-317	GNU General Public License v3.0 w/GCC Runtime Library exception	GPL-3.0-with-GCC-exception	http://spdx.org/licenses/GPL-3.0-with-GCC-exception		t
-318	GNU Lesser General Public License v2.1 or later	LGPL-2.1+	http://spdx.org/licenses/LGPL-2.1+		t
-319	GNU Lesser General Public License v3.0 or later	LGPL-3.0+	http://spdx.org/licenses/LGPL-3.0+		t
-320	GNU Library General Public License v2 or later	LGPL-2.0+	http://spdx.org/licenses/LGPL-2.0+		t
-321	Standard ML of New Jersey License	StandardML-NJ	http://spdx.org/licenses/StandardML-NJ		t
-322	wxWindows Library License	WXwindows	http://spdx.org/licenses/WXwindows		t
-\.
-
-
---
--- Data for Name: packages; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.packages (package_id, name, version, file_name, supplier_id, originator_id, download_location, verification_code, ver_code_excluded_file_id, sha256, home_page, source_info, concluded_license_id, declared_license_id, license_comment, copyright_text, summary, description, comment, dosocs2_dir_code) FROM stdin;
-\.
-
-
---
--- Data for Name: packages_files; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.packages_files (package_file_id, package_id, file_id, concluded_license_id, license_comment, file_name) FROM stdin;
-\.
-
-
---
--- Data for Name: packages_scans; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.packages_scans (package_scan_id, package_id, scanner_id) FROM stdin;
-\.
-
-
---
--- Data for Name: projects; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.projects (package_id, name, homepage, uri) FROM stdin;
-\.
-
-
---
--- Data for Name: relationship_types; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.relationship_types (relationship_type_id, name) FROM stdin;
-1	DESCRIBES
-2	DESCRIBED_BY
-3	CONTAINS
-4	CONTAINED_BY
-5	GENERATES
-6	GENERATED_FROM
-7	ANCESTOR_OF
-8	DESCENDANT_OF
-9	VARIANT_OF
-10	DISTRIBUTION_ARTIFACT
-11	PATCH_FOR
-12	PATCH_APPLIED
-13	COPY_OF
-14	FILE_ADDED
-15	FILE_DELETED
-16	FILE_MODIFIED
-17	EXPANDED_FROM_ARCHIVE
-18	DYNAMIC_LINK
-19	STATIC_LINK
-20	DATA_FILE_OF
-21	TEST_CASE_OF
-22	BUILD_TOOL_OF
-23	DOCUMENTATION_OF
-24	OPTIONAL_COMPONENT_OF
-25	METAFILE_OF
-26	PACKAGE_OF
-27	AMENDS
-28	PREREQUISITE_FOR
-29	HAS_PREREQUISITE
-30	OTHER
-\.
-
-
---
--- Data for Name: relationships; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.relationships (relationship_id, left_identifier_id, right_identifier_id, relationship_type_id, relationship_comment) FROM stdin;
-\.
-
-
---
--- Data for Name: sbom_scans; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.sbom_scans (repo_id, sbom_scan) FROM stdin;
-\.
-
-
---
--- Data for Name: scanners; Type: TABLE DATA; Schema: spdx; Owner: augur
---
-
-COPY spdx.scanners (scanner_id, name) FROM stdin;
-\.
-
-
---
--- Name: augur_data.repo_insights_ri_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data."augur_data.repo_insights_ri_id_seq"', 25430, false);
-
-
---
--- Name: chaoss_metric_status_cms_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.chaoss_metric_status_cms_id_seq', 1, false);
-
-
---
--- Name: chaoss_user_chaoss_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.chaoss_user_chaoss_id_seq', 1, false);
-
-
---
--- Name: commit_comment_ref_cmt_comment_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.commit_comment_ref_cmt_comment_id_seq', 25430, false);
-
-
---
--- Name: commit_parents_parent_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.commit_parents_parent_id_seq', 25430, false);
-
-
---
--- Name: commits_cmt_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.commits_cmt_id_seq', 25430, false);
-
-
---
--- Name: contributor_affiliations_ca_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.contributor_affiliations_ca_id_seq', 25430, false);
-
-
---
--- Name: contributor_repo_cntrb_repo_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.contributor_repo_cntrb_repo_id_seq', 1, false);
-
-
---
--- Name: contributors_aliases_cntrb_a_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.contributors_aliases_cntrb_a_id_seq', 25430, false);
-
-
---
--- Name: contributors_aliases_cntrb_alias_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.contributors_aliases_cntrb_alias_id_seq', 1, false);
-
-
---
--- Name: contributors_cntrb_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.contributors_cntrb_id_seq', 25430, false);
-
-
---
--- Name: contributors_history_cntrb_history_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.contributors_history_cntrb_history_id_seq', 25430, false);
-
-
---
--- Name: discourse_insights_msg_discourse_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.discourse_insights_msg_discourse_id_seq', 1, false);
-
-
---
--- Name: discourse_insights_msg_discourse_id_seq1; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.discourse_insights_msg_discourse_id_seq1', 1, false);
-
-
---
--- Name: issue_assignees_issue_assignee_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.issue_assignees_issue_assignee_id_seq', 1, false);
-
-
---
--- Name: issue_events_event_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.issue_events_event_id_seq', 25430, false);
-
-
---
--- Name: issue_labels_issue_label_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.issue_labels_issue_label_id_seq', 25430, false);
-
-
---
--- Name: issue_message_ref_issue_msg_ref_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.issue_message_ref_issue_msg_ref_id_seq', 25430, false);
-
-
---
--- Name: issue_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.issue_seq', 31000, false);
-
-
---
--- Name: libraries_library_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.libraries_library_id_seq', 25430, false);
-
-
---
--- Name: library_dependencies_lib_dependency_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.library_dependencies_lib_dependency_id_seq', 25430, false);
-
-
---
--- Name: library_version_library_version_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.library_version_library_version_id_seq', 25430, false);
-
-
---
--- Name: lstm_anomaly_models_model_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.lstm_anomaly_models_model_id_seq', 1, false);
-
-
---
--- Name: lstm_anomaly_results_result_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.lstm_anomaly_results_result_id_seq', 1, false);
-
-
---
--- Name: message_analysis_msg_analysis_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.message_analysis_msg_analysis_id_seq', 1, false);
-
-
---
--- Name: message_analysis_summary_msg_summary_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.message_analysis_summary_msg_summary_id_seq', 1, false);
-
-
---
--- Name: message_msg_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.message_msg_id_seq', 25430, false);
-
-
---
--- Name: message_sentiment_msg_analysis_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.message_sentiment_msg_analysis_id_seq', 1, false);
-
-
---
--- Name: message_sentiment_summary_msg_summary_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.message_sentiment_summary_msg_summary_id_seq', 1, false);
-
-
---
--- Name: platform_pltfrm_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.platform_pltfrm_id_seq', 25430, false);
-
-
---
--- Name: pull_request_analysis_pull_request_analysis_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_analysis_pull_request_analysis_id_seq', 1, false);
-
-
---
--- Name: pull_request_assignees_pr_assignee_map_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_assignees_pr_assignee_map_id_seq', 25430, false);
-
-
---
--- Name: pull_request_commits_pr_cmt_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_commits_pr_cmt_id_seq', 1, false);
-
-
---
--- Name: pull_request_events_pr_event_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_events_pr_event_id_seq', 25430, false);
-
-
---
--- Name: pull_request_files_pr_file_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_files_pr_file_id_seq', 25150, false);
-
-
---
--- Name: pull_request_labels_pr_label_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_labels_pr_label_id_seq', 25430, false);
-
-
---
--- Name: pull_request_message_ref_pr_msg_ref_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_message_ref_pr_msg_ref_id_seq', 25430, false);
-
-
---
--- Name: pull_request_meta_pr_repo_meta_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_meta_pr_repo_meta_id_seq', 25430, false);
-
-
---
--- Name: pull_request_repo_pr_repo_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_repo_pr_repo_id_seq', 25430, false);
-
-
---
--- Name: pull_request_review_message_ref_pr_review_msg_ref_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq', 1, false);
-
-
---
--- Name: pull_request_reviewers_pr_reviewer_map_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_reviewers_pr_reviewer_map_id_seq', 25430, false);
-
-
---
--- Name: pull_request_reviews_pr_review_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_reviews_pr_review_id_seq', 1, false);
-
-
---
--- Name: pull_request_teams_pr_team_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_request_teams_pr_team_id_seq', 25430, false);
-
-
---
--- Name: pull_requests_pull_request_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.pull_requests_pull_request_id_seq', 25430, false);
-
-
---
--- Name: releases_release_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.releases_release_id_seq', 1, false);
-
-
---
--- Name: repo_badging_badge_collection_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_badging_badge_collection_id_seq', 25012, false);
-
-
---
--- Name: repo_cluster_messages_msg_cluster_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_cluster_messages_msg_cluster_id_seq', 1, false);
-
-
---
--- Name: repo_dependencies_repo_dependencies_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_dependencies_repo_dependencies_id_seq', 1, false);
-
-
---
--- Name: repo_deps_libyear_repo_deps_libyear_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_deps_libyear_repo_deps_libyear_id_seq', 1, false);
-
-
---
--- Name: repo_deps_scorecard_repo_deps_scorecard_id_seq1; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1', 1, false);
-
-
---
--- Name: repo_group_insights_rgi_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_group_insights_rgi_id_seq', 25430, false);
-
-
---
--- Name: repo_groups_list_serve_rgls_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_groups_list_serve_rgls_id_seq', 25430, false);
-
-
---
--- Name: repo_groups_repo_group_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_groups_repo_group_id_seq', 25430, false);
-
-
---
--- Name: repo_info_repo_info_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_info_repo_info_id_seq', 25430, false);
-
-
---
--- Name: repo_insights_records_ri_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_insights_records_ri_id_seq', 1, false);
-
-
---
--- Name: repo_insights_ri_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_insights_ri_id_seq', 1, false);
-
-
---
--- Name: repo_labor_repo_labor_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_labor_repo_labor_id_seq', 25430, false);
-
-
---
--- Name: repo_meta_rmeta_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_meta_rmeta_id_seq', 25430, false);
-
-
---
--- Name: repo_repo_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_repo_id_seq', 25430, false);
-
-
---
--- Name: repo_sbom_scans_rsb_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_sbom_scans_rsb_id_seq', 25430, false);
-
-
---
--- Name: repo_stats_rstat_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_stats_rstat_id_seq', 25430, false);
-
-
---
--- Name: repo_test_coverage_repo_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_test_coverage_repo_id_seq', 1, false);
-
-
---
--- Name: repo_topic_repo_topic_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.repo_topic_repo_topic_id_seq', 1, false);
-
-
---
--- Name: topic_words_topic_words_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.topic_words_topic_words_id_seq', 1, false);
-
-
---
--- Name: unresolved_commit_emails_email_unresolved_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.unresolved_commit_emails_email_unresolved_id_seq', 1, false);
-
-
---
--- Name: utility_log_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.utility_log_id_seq', 1, false);
-
-
---
--- Name: utility_log_id_seq1; Type: SEQUENCE SET; Schema: augur_data; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_data.utility_log_id_seq1', 1, false);
-
-
---
--- Name: affiliations_corp_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_operations.affiliations_corp_id_seq', 620000, false);
-
-
---
--- Name: augur_settings_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_operations.augur_settings_id_seq', 1, false);
-
-
---
--- Name: config_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_operations.config_id_seq', 35, true);
-
-
---
--- Name: gh_worker_history_history_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_operations.gh_worker_history_history_id_seq', 15000, false);
-
-
---
--- Name: users_user_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_operations.users_user_id_seq', 1, false);
-
-
---
--- Name: worker_oauth_oauth_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur
---
-
-SELECT pg_catalog.setval('augur_operations.worker_oauth_oauth_id_seq', 1000, false);
-
-
---
--- Name: annotation_types_annotation_type_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.annotation_types_annotation_type_id_seq', 1, false);
-
-
---
--- Name: annotations_annotation_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.annotations_annotation_id_seq', 1, false);
-
-
---
--- Name: augur_repo_map_map_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.augur_repo_map_map_id_seq', 1, false);
-
-
---
--- Name: creator_types_creator_type_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.creator_types_creator_type_id_seq', 1, false);
-
-
---
--- Name: creators_creator_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.creators_creator_id_seq', 1, false);
-
-
---
--- Name: document_namespaces_document_namespace_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.document_namespaces_document_namespace_id_seq', 1, false);
-
-
---
--- Name: documents_creators_document_creator_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.documents_creators_document_creator_id_seq', 1, false);
-
-
---
--- Name: documents_document_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.documents_document_id_seq', 1, false);
-
-
---
--- Name: external_refs_external_ref_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.external_refs_external_ref_id_seq', 1, false);
-
-
---
--- Name: file_contributors_file_contributor_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.file_contributors_file_contributor_id_seq', 1, false);
-
-
---
--- Name: file_types_file_type_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.file_types_file_type_id_seq', 1, false);
-
-
---
--- Name: files_file_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.files_file_id_seq', 1, false);
-
-
---
--- Name: files_licenses_file_license_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.files_licenses_file_license_id_seq', 1, false);
-
-
---
--- Name: files_scans_file_scan_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.files_scans_file_scan_id_seq', 1, false);
-
-
---
--- Name: identifiers_identifier_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.identifiers_identifier_id_seq', 1, false);
-
-
---
--- Name: licenses_license_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.licenses_license_id_seq', 1, false);
-
-
---
--- Name: packages_files_package_file_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.packages_files_package_file_id_seq', 1, false);
-
-
---
--- Name: packages_package_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.packages_package_id_seq', 1, false);
-
-
---
--- Name: packages_scans_package_scan_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.packages_scans_package_scan_id_seq', 1, false);
-
-
---
--- Name: projects_package_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.projects_package_id_seq', 1, false);
-
-
---
--- Name: relationship_types_relationship_type_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.relationship_types_relationship_type_id_seq', 1, false);
-
-
---
--- Name: relationships_relationship_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.relationships_relationship_id_seq', 1, false);
-
-
---
--- Name: scanners_scanner_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur
---
-
-SELECT pg_catalog.setval('spdx.scanners_scanner_id_seq', 1, false);
-
-
---
--- Name: contributors GH-UNIQUE-C; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributors
-    ADD CONSTRAINT "GH-UNIQUE-C" UNIQUE (gh_login) DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: contributors GL-UNIQUE-B; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributors
-    ADD CONSTRAINT "GL-UNIQUE-B" UNIQUE (gl_id) DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: contributors GL-UNIQUE-C; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributors
-    ADD CONSTRAINT "GL-UNIQUE-C" UNIQUE (gl_username) DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: contributors GL-cntrb-LOGIN-UNIQUE; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributors
-    ADD CONSTRAINT "GL-cntrb-LOGIN-UNIQUE" UNIQUE (cntrb_login);
-
-
---
--- Name: pull_request_assignees assigniees-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_assignees
-    ADD CONSTRAINT "assigniees-unique" UNIQUE (pull_request_id, pr_assignee_src_id);
-
-
---
--- Name: chaoss_metric_status chaoss_metric_status_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.chaoss_metric_status
-    ADD CONSTRAINT chaoss_metric_status_pkey PRIMARY KEY (cms_id);
-
-
---
--- Name: chaoss_user chaoss_unique_email_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.chaoss_user
-    ADD CONSTRAINT chaoss_unique_email_key UNIQUE (chaoss_email);
-
-
---
--- Name: chaoss_user chaoss_user_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.chaoss_user
-    ADD CONSTRAINT chaoss_user_pkey PRIMARY KEY (chaoss_id);
-
-
---
--- Name: contributor_repo cntrb_repo_id_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributor_repo
-    ADD CONSTRAINT cntrb_repo_id_key PRIMARY KEY (cntrb_repo_id);
-
-
---
--- Name: commit_comment_ref commit_comment_ref_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commit_comment_ref
-    ADD CONSTRAINT commit_comment_ref_pkey PRIMARY KEY (cmt_comment_id);
-
-
---
--- Name: commit_parents commit_parents_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commit_parents
-    ADD CONSTRAINT commit_parents_pkey PRIMARY KEY (cmt_id, parent_id);
-
-
---
--- Name: commit_comment_ref commitcomment; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commit_comment_ref
-    ADD CONSTRAINT commitcomment UNIQUE (cmt_comment_src_id);
-
-
---
--- Name: commits commits_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commits
-    ADD CONSTRAINT commits_pkey PRIMARY KEY (cmt_id);
-
-
---
--- Name: contributors_aliases contributor-alias-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributors_aliases
-    ADD CONSTRAINT "contributor-alias-unique" UNIQUE (alias_email);
-
-
---
--- Name: contributor_affiliations contributor_affiliations_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributor_affiliations
-    ADD CONSTRAINT contributor_affiliations_pkey PRIMARY KEY (ca_id);
-
-
---
--- Name: contributors contributors-pk; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributors
-    ADD CONSTRAINT "contributors-pk" PRIMARY KEY (cntrb_id);
-
-
---
--- Name: contributors_aliases contributors_aliases_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributors_aliases
-    ADD CONSTRAINT contributors_aliases_pkey PRIMARY KEY (cntrb_alias_id);
-
-
---
--- Name: discourse_insights discourse_insights_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.discourse_insights
-    ADD CONSTRAINT discourse_insights_pkey PRIMARY KEY (msg_discourse_id);
-
-
---
--- Name: contributor_repo eventer; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributor_repo
-    ADD CONSTRAINT eventer UNIQUE (event_id, tool_version);
-
-
---
--- Name: exclude exclude_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.exclude
-    ADD CONSTRAINT exclude_pkey PRIMARY KEY (id);
-
-
---
--- Name: issue_assignees issue-assignee-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_assignees
-    ADD CONSTRAINT "issue-assignee-insert-unique" UNIQUE (issue_assignee_src_id, issue_id);
-
-
---
--- Name: issues issue-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issues
-    ADD CONSTRAINT "issue-insert-unique" UNIQUE (issue_url);
-
-
---
--- Name: issue_message_ref issue-message-ref-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_message_ref
-    ADD CONSTRAINT "issue-message-ref-insert-unique" UNIQUE (issue_msg_ref_src_comment_id, issue_id);
-
-
---
--- Name: issue_assignees issue_assignees_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_assignees
-    ADD CONSTRAINT issue_assignees_pkey PRIMARY KEY (issue_assignee_id);
-
-
---
--- Name: issue_events issue_events_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_events
-    ADD CONSTRAINT issue_events_pkey PRIMARY KEY (event_id);
-
-
---
--- Name: issue_labels issue_labels_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_labels
-    ADD CONSTRAINT issue_labels_pkey PRIMARY KEY (issue_label_id);
-
-
---
--- Name: issue_message_ref issue_message_ref_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_message_ref
-    ADD CONSTRAINT issue_message_ref_pkey PRIMARY KEY (issue_msg_ref_id);
-
-
---
--- Name: issues issues_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issues
-    ADD CONSTRAINT issues_pkey PRIMARY KEY (issue_id);
-
-
---
--- Name: libraries libraries_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.libraries
-    ADD CONSTRAINT libraries_pkey PRIMARY KEY (library_id);
-
-
---
--- Name: library_dependencies library_dependencies_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.library_dependencies
-    ADD CONSTRAINT library_dependencies_pkey PRIMARY KEY (lib_dependency_id);
-
-
---
--- Name: library_version library_version_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.library_version
-    ADD CONSTRAINT library_version_pkey PRIMARY KEY (library_version_id);
-
-
---
--- Name: lstm_anomaly_models lstm_anomaly_models_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.lstm_anomaly_models
-    ADD CONSTRAINT lstm_anomaly_models_pkey PRIMARY KEY (model_id);
-
-
---
--- Name: lstm_anomaly_results lstm_anomaly_results_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.lstm_anomaly_results
-    ADD CONSTRAINT lstm_anomaly_results_pkey PRIMARY KEY (result_id);
-
-
---
--- Name: message message-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message
-    ADD CONSTRAINT "message-insert-unique" UNIQUE (platform_msg_id);
-
-
---
--- Name: message_analysis message_analysis_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message_analysis
-    ADD CONSTRAINT message_analysis_pkey PRIMARY KEY (msg_analysis_id);
-
-
---
--- Name: message_analysis_summary message_analysis_summary_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message_analysis_summary
-    ADD CONSTRAINT message_analysis_summary_pkey PRIMARY KEY (msg_summary_id);
-
-
---
--- Name: message message_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message
-    ADD CONSTRAINT message_pkey PRIMARY KEY (msg_id);
-
-
---
--- Name: message_sentiment message_sentiment_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message_sentiment
-    ADD CONSTRAINT message_sentiment_pkey PRIMARY KEY (msg_analysis_id);
-
-
---
--- Name: message_sentiment_summary message_sentiment_summary_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message_sentiment_summary
-    ADD CONSTRAINT message_sentiment_summary_pkey PRIMARY KEY (msg_summary_id);
-
-
---
--- Name: pull_request_events pr-unqiue-event; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_events
-    ADD CONSTRAINT "pr-unqiue-event" UNIQUE (node_id);
-
-
---
--- Name: pull_request_commits pr_commit_nk; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_commits
-    ADD CONSTRAINT pr_commit_nk UNIQUE (pull_request_id, repo_id, pr_cmt_sha);
-
-
---
--- Name: pull_request_events pr_events_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_events
-    ADD CONSTRAINT pr_events_pkey PRIMARY KEY (pr_event_id);
-
-
---
--- Name: pull_request_review_message_ref pr_review_msg_ref_id; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_review_message_ref
-    ADD CONSTRAINT pr_review_msg_ref_id PRIMARY KEY (pr_review_msg_ref_id);
-
-
---
--- Name: pull_request_files prfiles_unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_files
-    ADD CONSTRAINT prfiles_unique UNIQUE (pull_request_id, repo_id, pr_file_path);
-
-
---
--- Name: pull_requests pull-request-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_requests
-    ADD CONSTRAINT "pull-request-insert-unique" UNIQUE (pr_url);
-
-
---
--- Name: pull_request_message_ref pull-request-message-ref-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_message_ref
-    ADD CONSTRAINT "pull-request-message-ref-insert-unique" UNIQUE (pr_message_ref_src_comment_id, pull_request_id);
-
-
---
--- Name: pull_request_meta pull-request-meta-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_meta
-    ADD CONSTRAINT "pull-request-meta-insert-unique" UNIQUE (pull_request_id, pr_head_or_base, pr_sha);
-
-
---
--- Name: pull_request_review_message_ref pull-request-review-message-ref-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_review_message_ref
-    ADD CONSTRAINT "pull-request-review-message-ref-insert-unique" UNIQUE (pr_review_msg_src_id);
-
-
---
--- Name: pull_request_analysis pull_request_analysis_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_analysis
-    ADD CONSTRAINT pull_request_analysis_pkey PRIMARY KEY (pull_request_analysis_id);
-
-
---
--- Name: pull_request_assignees pull_request_assignees_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_assignees
-    ADD CONSTRAINT pull_request_assignees_pkey PRIMARY KEY (pr_assignee_map_id);
-
-
---
--- Name: pull_request_commits pull_request_commits_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_commits
-    ADD CONSTRAINT pull_request_commits_pkey PRIMARY KEY (pr_cmt_id);
-
-
---
--- Name: pull_request_files pull_request_files_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_files
-    ADD CONSTRAINT pull_request_files_pkey PRIMARY KEY (pr_file_id);
-
-
---
--- Name: pull_request_labels pull_request_labels_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_labels
-    ADD CONSTRAINT pull_request_labels_pkey PRIMARY KEY (pr_label_id);
-
-
---
--- Name: pull_request_message_ref pull_request_message_ref_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_message_ref
-    ADD CONSTRAINT pull_request_message_ref_pkey PRIMARY KEY (pr_msg_ref_id);
-
-
---
--- Name: pull_request_meta pull_request_meta_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_meta
-    ADD CONSTRAINT pull_request_meta_pkey PRIMARY KEY (pr_repo_meta_id);
-
-
---
--- Name: pull_request_repo pull_request_repo_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_repo
-    ADD CONSTRAINT pull_request_repo_pkey PRIMARY KEY (pr_repo_id);
-
-
---
--- Name: pull_request_reviews pull_request_review_id; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviews
-    ADD CONSTRAINT pull_request_review_id PRIMARY KEY (pr_review_id);
-
-
---
--- Name: pull_request_reviewers pull_request_reviewers_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviewers
-    ADD CONSTRAINT pull_request_reviewers_pkey PRIMARY KEY (pr_reviewer_map_id);
-
-
---
--- Name: pull_request_teams pull_request_teams_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_teams
-    ADD CONSTRAINT pull_request_teams_pkey PRIMARY KEY (pr_team_id);
-
-
---
--- Name: pull_requests pull_requests_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_requests
-    ADD CONSTRAINT pull_requests_pkey PRIMARY KEY (pull_request_id);
-
-
---
--- Name: releases releases_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.releases
-    ADD CONSTRAINT releases_pkey PRIMARY KEY (release_id);
-
-
---
--- Name: repo_badging repo_badging_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_badging
-    ADD CONSTRAINT repo_badging_pkey PRIMARY KEY (badge_collection_id);
-
-
---
--- Name: repo_cluster_messages repo_cluster_messages_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_cluster_messages
-    ADD CONSTRAINT repo_cluster_messages_pkey PRIMARY KEY (msg_cluster_id);
-
-
---
--- Name: repo_dependencies repo_dependencies_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_dependencies
-    ADD CONSTRAINT repo_dependencies_pkey PRIMARY KEY (repo_dependencies_id);
-
-
---
--- Name: repo_deps_libyear repo_deps_libyear_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_deps_libyear
-    ADD CONSTRAINT repo_deps_libyear_pkey PRIMARY KEY (repo_deps_libyear_id);
-
-
---
--- Name: repo_deps_scorecard repo_deps_scorecard_pkey1; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_deps_scorecard
-    ADD CONSTRAINT repo_deps_scorecard_pkey1 PRIMARY KEY (repo_deps_scorecard_id);
-
-
---
--- Name: repo repo_git-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo
-    ADD CONSTRAINT "repo_git-unique" UNIQUE (repo_git);
-
-
---
--- Name: repo_group_insights repo_group_insights_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_group_insights
-    ADD CONSTRAINT repo_group_insights_pkey PRIMARY KEY (rgi_id);
-
-
---
--- Name: repo_groups_list_serve repo_groups_list_serve_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_groups_list_serve
-    ADD CONSTRAINT repo_groups_list_serve_pkey PRIMARY KEY (rgls_id);
-
-
---
--- Name: repo_info repo_info_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_info
-    ADD CONSTRAINT repo_info_pkey PRIMARY KEY (repo_info_id);
-
-
---
--- Name: repo_insights repo_insights_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_insights
-    ADD CONSTRAINT repo_insights_pkey PRIMARY KEY (ri_id);
-
-
---
--- Name: repo_insights_records repo_insights_records_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_insights_records
-    ADD CONSTRAINT repo_insights_records_pkey PRIMARY KEY (ri_id);
-
-
---
--- Name: repo_labor repo_labor_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_labor
-    ADD CONSTRAINT repo_labor_pkey PRIMARY KEY (repo_labor_id);
-
-
---
--- Name: repo_meta repo_meta_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_meta
-    ADD CONSTRAINT repo_meta_pkey PRIMARY KEY (rmeta_id, repo_id);
-
-
---
--- Name: repo_sbom_scans repo_sbom_scans_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_sbom_scans
-    ADD CONSTRAINT repo_sbom_scans_pkey PRIMARY KEY (rsb_id);
-
-
---
--- Name: repo_stats repo_stats_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_stats
-    ADD CONSTRAINT repo_stats_pkey PRIMARY KEY (rstat_id, repo_id);
-
-
---
--- Name: repo_test_coverage repo_test_coverage_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_test_coverage
-    ADD CONSTRAINT repo_test_coverage_pkey PRIMARY KEY (repo_id);
-
-
---
--- Name: repo_topic repo_topic_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_topic
-    ADD CONSTRAINT repo_topic_pkey PRIMARY KEY (repo_topic_id);
-
-
---
--- Name: repo repounique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo
-    ADD CONSTRAINT repounique PRIMARY KEY (repo_id);
-
-
---
--- Name: repo_groups rgid; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_groups
-    ADD CONSTRAINT rgid PRIMARY KEY (repo_group_id);
-
-
---
--- Name: repo_groups_list_serve rglistserve; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_groups_list_serve
-    ADD CONSTRAINT rglistserve UNIQUE (rgls_id, repo_group_id);
-
-
---
--- Name: repo_labor rl-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_labor
-    ADD CONSTRAINT "rl-unique" UNIQUE (repo_id, rl_analysis_date, file_path, file_name) DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: settings settings_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.settings
-    ADD CONSTRAINT settings_pkey PRIMARY KEY (id);
-
-
---
--- Name: pull_request_reviews sourcepr-review-id; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviews
-    ADD CONSTRAINT "sourcepr-review-id" UNIQUE (pr_review_src_id, tool_source);
-
-
---
--- Name: CONSTRAINT "sourcepr-review-id" ON pull_request_reviews; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON CONSTRAINT "sourcepr-review-id" ON augur_data.pull_request_reviews IS 'Natural Key from Source, plus tool source to account for different platforms like GitHub and gitlab. ';
-
-
---
--- Name: platform theplat; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.platform
-    ADD CONSTRAINT theplat PRIMARY KEY (pltfrm_id);
-
-
---
--- Name: topic_words topic_words_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.topic_words
-    ADD CONSTRAINT topic_words_pkey PRIMARY KEY (topic_words_id);
-
-
---
--- Name: issues unique-issue; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issues
-    ADD CONSTRAINT "unique-issue" UNIQUE (repo_id, gh_issue_id);
-
-
---
--- Name: pull_requests unique-pr; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_requests
-    ADD CONSTRAINT "unique-pr" UNIQUE (repo_id, pr_src_id);
-
-
---
--- Name: pull_request_events unique-pr-event-id; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_events
-    ADD CONSTRAINT "unique-pr-event-id" UNIQUE (platform_id, node_id);
-
-
---
--- Name: pull_request_labels unique-pr-src-label-id; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_labels
-    ADD CONSTRAINT "unique-pr-src-label-id" UNIQUE (pr_src_id, pull_request_id);
-
-
---
--- Name: pull_requests unique-prx; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_requests
-    ADD CONSTRAINT "unique-prx" UNIQUE (repo_id, pr_src_id);
-
-
---
--- Name: contributor_affiliations unique_domain; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributor_affiliations
-    ADD CONSTRAINT unique_domain UNIQUE (ca_domain);
-
-
---
--- Name: CONSTRAINT unique_domain ON contributor_affiliations; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON CONSTRAINT unique_domain ON augur_data.contributor_affiliations IS 'Only one row should exist for any given top level domain or subdomain. ';
-
-
---
--- Name: issue_events unique_event_id_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_events
-    ADD CONSTRAINT unique_event_id_key UNIQUE (issue_id, issue_event_src_id);
-
-
---
--- Name: CONSTRAINT unique_event_id_key ON issue_events; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON CONSTRAINT unique_event_id_key ON augur_data.issue_events IS 'Natural key for issue events. ';
-
-
---
--- Name: issue_labels unique_issue_label; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_labels
-    ADD CONSTRAINT unique_issue_label UNIQUE (label_src_id, issue_id);
-
-
---
--- Name: pull_request_reviewers unique_pr_src_reviewer_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviewers
-    ADD CONSTRAINT unique_pr_src_reviewer_key UNIQUE (pull_request_id, pr_reviewer_src_id);
-
-
---
--- Name: unresolved_commit_emails unresolved_commit_emails_email_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.unresolved_commit_emails
-    ADD CONSTRAINT unresolved_commit_emails_email_key UNIQUE (email);
-
-
---
--- Name: unresolved_commit_emails unresolved_commit_emails_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.unresolved_commit_emails
-    ADD CONSTRAINT unresolved_commit_emails_pkey PRIMARY KEY (email_unresolved_id);
-
-
---
--- Name: utility_log utility_log_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.utility_log
-    ADD CONSTRAINT utility_log_pkey PRIMARY KEY (id);
-
-
---
--- Name: augur_settings augur_settings_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.augur_settings
-    ADD CONSTRAINT augur_settings_pkey PRIMARY KEY (id);
-
-
---
--- Name: config config_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.config
-    ADD CONSTRAINT config_pkey PRIMARY KEY (id);
-
-
---
--- Name: worker_history history_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.worker_history
-    ADD CONSTRAINT history_pkey PRIMARY KEY (history_id);
-
-
---
--- Name: worker_job job_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.worker_job
-    ADD CONSTRAINT job_pkey PRIMARY KEY (job_model);
-
-
---
--- Name: worker_settings_facade settings_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.worker_settings_facade
-    ADD CONSTRAINT settings_pkey PRIMARY KEY (id);
-
-
---
--- Name: config unique-config-setting; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.config
-    ADD CONSTRAINT "unique-config-setting" UNIQUE (section_name, setting_name);
-
-
---
--- Name: users user-unique-email; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.users
-    ADD CONSTRAINT "user-unique-email" UNIQUE (email);
-
-
---
--- Name: users user-unique-name; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.users
-    ADD CONSTRAINT "user-unique-name" UNIQUE (login_name);
-
-
---
--- Name: users user-unique-phone; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.users
-    ADD CONSTRAINT "user-unique-phone" UNIQUE (text_phone);
-
-
---
--- Name: users users_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.users
-    ADD CONSTRAINT users_pkey PRIMARY KEY (user_id);
-
-
---
--- Name: worker_oauth worker_oauth_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur
---
-
-ALTER TABLE ONLY augur_operations.worker_oauth
-    ADD CONSTRAINT worker_oauth_pkey PRIMARY KEY (oauth_id);
-
-
---
--- Name: alembic_version alembic_version_pkc; Type: CONSTRAINT; Schema: public; Owner: augur
---
-
-ALTER TABLE ONLY public.alembic_version
-    ADD CONSTRAINT alembic_version_pkc PRIMARY KEY (version_num);
-
-
---
--- Name: annotation_types annotation_types_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.annotation_types
-    ADD CONSTRAINT annotation_types_pkey PRIMARY KEY (annotation_type_id);
-
-
---
--- Name: annotations annotations_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.annotations
-    ADD CONSTRAINT annotations_pkey PRIMARY KEY (annotation_id);
-
-
---
--- Name: augur_repo_map augur_repo_map_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.augur_repo_map
-    ADD CONSTRAINT augur_repo_map_pkey PRIMARY KEY (map_id);
-
-
---
--- Name: creator_types creator_types_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.creator_types
-    ADD CONSTRAINT creator_types_pkey PRIMARY KEY (creator_type_id);
-
-
---
--- Name: creators creators_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.creators
-    ADD CONSTRAINT creators_pkey PRIMARY KEY (creator_id);
-
-
---
--- Name: document_namespaces document_namespaces_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.document_namespaces
-    ADD CONSTRAINT document_namespaces_pkey PRIMARY KEY (document_namespace_id);
-
-
---
--- Name: documents_creators documents_creators_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.documents_creators
-    ADD CONSTRAINT documents_creators_pkey PRIMARY KEY (document_creator_id);
-
-
---
--- Name: documents documents_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.documents
-    ADD CONSTRAINT documents_pkey PRIMARY KEY (document_id);
-
-
---
--- Name: external_refs external_refs_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.external_refs
-    ADD CONSTRAINT external_refs_pkey PRIMARY KEY (external_ref_id);
-
-
---
--- Name: file_contributors file_contributors_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.file_contributors
-    ADD CONSTRAINT file_contributors_pkey PRIMARY KEY (file_contributor_id);
-
-
---
--- Name: files_licenses files_licenses_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files_licenses
-    ADD CONSTRAINT files_licenses_pkey PRIMARY KEY (file_license_id);
-
-
---
--- Name: files files_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files
-    ADD CONSTRAINT files_pkey PRIMARY KEY (file_id);
-
-
---
--- Name: files_scans files_scans_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files_scans
-    ADD CONSTRAINT files_scans_pkey PRIMARY KEY (file_scan_id);
-
-
---
--- Name: identifiers identifiers_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT identifiers_pkey PRIMARY KEY (identifier_id);
-
-
---
--- Name: licenses licenses_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.licenses
-    ADD CONSTRAINT licenses_pkey PRIMARY KEY (license_id);
-
-
---
--- Name: packages_files packages_files_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_files
-    ADD CONSTRAINT packages_files_pkey PRIMARY KEY (package_file_id);
-
-
---
--- Name: packages packages_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages
-    ADD CONSTRAINT packages_pkey PRIMARY KEY (package_id);
-
-
---
--- Name: packages_scans packages_scans_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_scans
-    ADD CONSTRAINT packages_scans_pkey PRIMARY KEY (package_scan_id);
-
-
---
--- Name: projects projects_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.projects
-    ADD CONSTRAINT projects_pkey PRIMARY KEY (package_id);
-
-
---
--- Name: relationship_types relationship_types_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.relationship_types
-    ADD CONSTRAINT relationship_types_pkey PRIMARY KEY (relationship_type_id);
-
-
---
--- Name: relationships relationships_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.relationships
-    ADD CONSTRAINT relationships_pkey PRIMARY KEY (relationship_id);
-
-
---
--- Name: scanners scanners_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.scanners
-    ADD CONSTRAINT scanners_pkey PRIMARY KEY (scanner_id);
-
-
---
--- Name: annotation_types uc_annotation_type_name; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.annotation_types
-    ADD CONSTRAINT uc_annotation_type_name UNIQUE (name);
-
-
---
--- Name: packages uc_dir_code_ver_code; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages
-    ADD CONSTRAINT uc_dir_code_ver_code UNIQUE (verification_code, dosocs2_dir_code);
-
-
---
--- Name: documents uc_document_document_namespace_id; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.documents
-    ADD CONSTRAINT uc_document_document_namespace_id UNIQUE (document_namespace_id);
-
-
---
--- Name: document_namespaces uc_document_namespace_uri; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.document_namespaces
-    ADD CONSTRAINT uc_document_namespace_uri UNIQUE (uri);
-
-
---
--- Name: external_refs uc_external_ref_document_id_string; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.external_refs
-    ADD CONSTRAINT uc_external_ref_document_id_string UNIQUE (document_id, id_string);
-
-
---
--- Name: files_licenses uc_file_license; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files_licenses
-    ADD CONSTRAINT uc_file_license UNIQUE (file_id, license_id);
-
-
---
--- Name: files_scans uc_file_scanner_id; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files_scans
-    ADD CONSTRAINT uc_file_scanner_id UNIQUE (file_id, scanner_id);
-
-
---
--- Name: files uc_file_sha256; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files
-    ADD CONSTRAINT uc_file_sha256 UNIQUE (sha256);
-
-
---
--- Name: file_types uc_file_type_name; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.file_types
-    ADD CONSTRAINT uc_file_type_name PRIMARY KEY (name);
-
-
---
--- Name: identifiers uc_identifier_document_namespace_id; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT uc_identifier_document_namespace_id UNIQUE (document_namespace_id, id_string);
-
-
---
--- Name: identifiers uc_identifier_namespace_document_id; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT uc_identifier_namespace_document_id UNIQUE (document_namespace_id, document_id);
-
-
---
--- Name: identifiers uc_identifier_namespace_package_file_id; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT uc_identifier_namespace_package_file_id UNIQUE (document_namespace_id, package_file_id);
-
-
---
--- Name: identifiers uc_identifier_namespace_package_id; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT uc_identifier_namespace_package_id UNIQUE (document_namespace_id, package_id);
-
-
---
--- Name: relationships uc_left_right_relationship_type; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.relationships
-    ADD CONSTRAINT uc_left_right_relationship_type UNIQUE (left_identifier_id, right_identifier_id, relationship_type_id);
-
-
---
--- Name: licenses uc_license_short_name; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.licenses
-    ADD CONSTRAINT uc_license_short_name UNIQUE (short_name);
-
-
---
--- Name: packages_files uc_package_id_file_name; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_files
-    ADD CONSTRAINT uc_package_id_file_name UNIQUE (package_id, file_name);
-
-
---
--- Name: packages_scans uc_package_scanner_id; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_scans
-    ADD CONSTRAINT uc_package_scanner_id UNIQUE (package_id, scanner_id);
-
-
---
--- Name: packages uc_package_sha256; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages
-    ADD CONSTRAINT uc_package_sha256 UNIQUE (sha256);
-
-
---
--- Name: relationship_types uc_relationship_type_name; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.relationship_types
-    ADD CONSTRAINT uc_relationship_type_name UNIQUE (name);
-
-
---
--- Name: scanners uc_scanner_name; Type: CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.scanners
-    ADD CONSTRAINT uc_scanner_name UNIQUE (name);
-
-
---
--- Name: REPO_DEP; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "REPO_DEP" ON augur_data.library_dependencies USING btree (library_id);
-
-
---
--- Name: author_affiliation; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX author_affiliation ON augur_data.commits USING btree (cmt_author_affiliation);
-
-
---
--- Name: author_cntrb_id; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX author_cntrb_id ON augur_data.commits USING btree (cmt_ght_author_id);
-
-
---
--- Name: author_email,author_affiliation,author_date; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "author_email,author_affiliation,author_date" ON augur_data.commits USING btree (cmt_author_email, cmt_author_affiliation, cmt_author_date);
-
-
---
--- Name: author_raw_email; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX author_raw_email ON augur_data.commits USING btree (cmt_author_raw_email);
-
-
---
--- Name: cnt-fullname; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "cnt-fullname" ON augur_data.contributors USING hash (cntrb_full_name);
-
-
---
--- Name: cntrb-theemail; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "cntrb-theemail" ON augur_data.contributors USING hash (cntrb_email);
-
-
---
--- Name: cntrb_canonica-idx11; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "cntrb_canonica-idx11" ON augur_data.contributors USING btree (cntrb_canonical);
-
-
---
--- Name: cntrb_login_platform_index; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX cntrb_login_platform_index ON augur_data.contributors USING btree (cntrb_login);
-
-
---
--- Name: comment_id; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX comment_id ON augur_data.commit_comment_ref USING btree (cmt_comment_src_id, cmt_comment_id, msg_id);
-
-
---
--- Name: commit_parents_ibfk_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX commit_parents_ibfk_1 ON augur_data.commit_parents USING btree (cmt_id);
-
-
---
--- Name: commit_parents_ibfk_2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX commit_parents_ibfk_2 ON augur_data.commit_parents USING btree (parent_id);
-
-
---
--- Name: commited; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX commited ON augur_data.commits USING btree (cmt_id);
-
-
---
--- Name: commits_idx_cmt_email_cmt_date_cmt_name; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX commits_idx_cmt_email_cmt_date_cmt_name ON augur_data.commits USING btree (cmt_author_email, cmt_author_date, cmt_author_name);
-
-
---
--- Name: committer_affiliation; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX committer_affiliation ON augur_data.commits USING btree (cmt_committer_affiliation);
-
-
---
--- Name: committer_raw_email; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX committer_raw_email ON augur_data.commits USING btree (cmt_committer_raw_email);
-
-
---
--- Name: contributor_worker_email_finder; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX contributor_worker_email_finder ON augur_data.contributors USING brin (cntrb_email);
-
-
---
--- Name: contributor_worker_fullname_finder; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX contributor_worker_fullname_finder ON augur_data.contributors USING brin (cntrb_full_name);
-
-
---
--- Name: contributors_idx_cntrb_email3; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX contributors_idx_cntrb_email3 ON augur_data.contributors USING btree (cntrb_email);
-
-
---
--- Name: dater; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX dater ON augur_data.repo_insights_records USING btree (ri_date);
-
-
---
--- Name: forked; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX forked ON augur_data.repo USING btree (forked_from);
-
-
---
--- Name: id_node; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX id_node ON augur_data.pull_requests USING btree (pr_src_id DESC, pr_src_node_id DESC NULLS LAST);
-
-
---
--- Name: issue-cntrb-assign-idx-1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "issue-cntrb-assign-idx-1" ON augur_data.issue_assignees USING btree (cntrb_id);
-
-
---
--- Name: issue-cntrb-dix2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "issue-cntrb-dix2" ON augur_data.issues USING btree (cntrb_id);
-
-
---
--- Name: issue-cntrb-idx2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "issue-cntrb-idx2" ON augur_data.issue_events USING btree (issue_event_src_id);
-
-
---
--- Name: issue_events_ibfk_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX issue_events_ibfk_1 ON augur_data.issue_events USING btree (issue_id);
-
-
---
--- Name: issue_events_ibfk_2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX issue_events_ibfk_2 ON augur_data.issue_events USING btree (cntrb_id);
-
-
---
--- Name: issues_ibfk_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX issues_ibfk_1 ON augur_data.issues USING btree (repo_id);
-
-
---
--- Name: issues_ibfk_2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX issues_ibfk_2 ON augur_data.issues USING btree (reporter_id);
-
-
---
--- Name: issues_ibfk_4; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX issues_ibfk_4 ON augur_data.issues USING btree (pull_request_id);
-
-
---
--- Name: lister; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE UNIQUE INDEX lister ON augur_data.repo_groups_list_serve USING btree (rgls_id, repo_group_id);
-
-
---
--- Name: login; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX login ON augur_data.contributors USING btree (cntrb_login);
-
-
---
--- Name: login-contributor-idx; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "login-contributor-idx" ON augur_data.contributors USING btree (cntrb_login);
-
-
---
--- Name: messagegrouper; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE UNIQUE INDEX messagegrouper ON augur_data.message USING btree (msg_id, rgls_id);
-
-
---
--- Name: msg-cntrb-id-idx; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "msg-cntrb-id-idx" ON augur_data.message USING btree (cntrb_id);
-
-
---
--- Name: plat; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE UNIQUE INDEX plat ON augur_data.platform USING btree (pltfrm_id);
-
-
---
--- Name: platformgrouper; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX platformgrouper ON augur_data.message USING btree (msg_id, pltfrm_id);
-
-
---
--- Name: pr-cntrb-idx-repo; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "pr-cntrb-idx-repo" ON augur_data.pull_request_repo USING btree (pr_cntrb_id);
-
-
---
--- Name: pr-reviewers-cntrb-idx1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "pr-reviewers-cntrb-idx1" ON augur_data.pull_request_reviewers USING btree (cntrb_id);
-
-
---
--- Name: pr_anal_idx; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX pr_anal_idx ON augur_data.pull_request_analysis USING btree (pull_request_id);
-
-
---
--- Name: pr_events_ibfk_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX pr_events_ibfk_1 ON augur_data.pull_request_events USING btree (pull_request_id);
-
-
---
--- Name: pr_events_ibfk_2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX pr_events_ibfk_2 ON augur_data.pull_request_events USING btree (cntrb_id);
-
-
---
--- Name: pr_meta-cntrbid-idx; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "pr_meta-cntrbid-idx" ON augur_data.pull_request_meta USING btree (cntrb_id);
-
-
---
--- Name: pr_meta_cntrb-idx; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "pr_meta_cntrb-idx" ON augur_data.pull_request_assignees USING btree (contrib_id);
-
-
---
--- Name: probability_idx; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX probability_idx ON augur_data.pull_request_analysis USING btree (merge_probability DESC NULLS LAST);
-
-
---
--- Name: projects_id,affiliation; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,affiliation" ON augur_data.dm_repo_group_weekly USING btree (repo_group_id, affiliation);
-
-
---
--- Name: projects_id,affiliation_copy_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,affiliation_copy_1" ON augur_data.dm_repo_group_annual USING btree (repo_group_id, affiliation);
-
-
---
--- Name: projects_id,affiliation_copy_2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,affiliation_copy_2" ON augur_data.dm_repo_group_monthly USING btree (repo_group_id, affiliation);
-
-
---
--- Name: projects_id,email; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,email" ON augur_data.dm_repo_group_weekly USING btree (repo_group_id, email);
-
-
---
--- Name: projects_id,email_copy_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,email_copy_1" ON augur_data.dm_repo_group_annual USING btree (repo_group_id, email);
-
-
---
--- Name: projects_id,email_copy_2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,email_copy_2" ON augur_data.dm_repo_group_monthly USING btree (repo_group_id, email);
-
-
---
--- Name: projects_id,year,affiliation; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,year,affiliation" ON augur_data.dm_repo_group_weekly USING btree (repo_group_id, year, affiliation);
-
-
---
--- Name: projects_id,year,affiliation_copy_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,year,affiliation_copy_1" ON augur_data.dm_repo_group_monthly USING btree (repo_group_id, year, affiliation);
-
-
---
--- Name: projects_id,year,email; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,year,email" ON augur_data.dm_repo_group_weekly USING btree (repo_group_id, year, email);
-
-
---
--- Name: projects_id,year,email_copy_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "projects_id,year,email_copy_1" ON augur_data.dm_repo_group_monthly USING btree (repo_group_id, year, email);
-
-
---
--- Name: pull_requests_idx_repo_id_data_datex; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX pull_requests_idx_repo_id_data_datex ON augur_data.pull_requests USING btree (repo_id, data_collection_date);
-
-
---
--- Name: repo_id,affiliation; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,affiliation" ON augur_data.dm_repo_weekly USING btree (repo_id, affiliation);
-
-
---
--- Name: repo_id,affiliation_copy_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,affiliation_copy_1" ON augur_data.dm_repo_annual USING btree (repo_id, affiliation);
-
-
---
--- Name: repo_id,affiliation_copy_2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,affiliation_copy_2" ON augur_data.dm_repo_monthly USING btree (repo_id, affiliation);
-
-
---
--- Name: repo_id,commit; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,commit" ON augur_data.commits USING btree (repo_id, cmt_commit_hash);
-
-
---
--- Name: repo_id,email; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,email" ON augur_data.dm_repo_weekly USING btree (repo_id, email);
-
-
---
--- Name: repo_id,email_copy_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,email_copy_1" ON augur_data.dm_repo_annual USING btree (repo_id, email);
-
-
---
--- Name: repo_id,email_copy_2; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,email_copy_2" ON augur_data.dm_repo_monthly USING btree (repo_id, email);
-
-
---
--- Name: repo_id,year,affiliation; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,year,affiliation" ON augur_data.dm_repo_weekly USING btree (repo_id, year, affiliation);
-
-
---
--- Name: repo_id,year,affiliation_copy_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,year,affiliation_copy_1" ON augur_data.dm_repo_monthly USING btree (repo_id, year, affiliation);
-
-
---
--- Name: repo_id,year,email; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,year,email" ON augur_data.dm_repo_weekly USING btree (repo_id, year, email);
-
-
---
--- Name: repo_id,year,email_copy_1; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repo_id,year,email_copy_1" ON augur_data.dm_repo_monthly USING btree (repo_id, year, email);
-
-
---
--- Name: repo_idx_repo_id_repo_namex; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX repo_idx_repo_id_repo_namex ON augur_data.repo USING btree (repo_id, repo_name);
-
-
---
--- Name: repo_info_idx_repo_id_data_date_1x; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX repo_info_idx_repo_id_data_date_1x ON augur_data.repo_info USING btree (repo_id, data_collection_date);
-
-
---
--- Name: repo_info_idx_repo_id_data_datex; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX repo_info_idx_repo_id_data_datex ON augur_data.repo_info USING btree (repo_id, data_collection_date);
-
-
---
--- Name: repogitindexrep; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX repogitindexrep ON augur_data.repo USING btree (repo_git);
-
-
---
--- Name: reponameindex; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX reponameindex ON augur_data.repo USING hash (repo_name);
-
-
---
--- Name: reponameindexbtree; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX reponameindexbtree ON augur_data.repo USING btree (repo_name);
-
-
---
--- Name: repos_id; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX repos_id ON augur_data.analysis_log USING btree (repos_id);
-
-
---
--- Name: repos_id,status; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repos_id,status" ON augur_data.repos_fetch_log USING btree (repos_id, status);
-
-
---
--- Name: repos_id,statusops; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "repos_id,statusops" ON augur_data.repos_fetch_log USING btree (repos_id, status);
-
-
---
--- Name: rggrouponrepoindex; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX rggrouponrepoindex ON augur_data.repo USING btree (repo_group_id);
-
-
---
--- Name: rgidm; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE UNIQUE INDEX rgidm ON augur_data.repo_groups USING btree (repo_group_id);
-
-
---
--- Name: rgnameindex; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX rgnameindex ON augur_data.repo_groups USING btree (rg_name);
-
-
---
--- Name: therepo; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE UNIQUE INDEX therepo ON augur_data.repo USING btree (repo_id);
-
-
---
--- Name: type,projects_id; Type: INDEX; Schema: augur_data; Owner: augur
---
-
-CREATE INDEX "type,projects_id" ON augur_data.unknown_cache USING btree (type, repo_group_id);
-
-
---
--- Name: repos_id,statusops; Type: INDEX; Schema: augur_operations; Owner: augur
---
-
-CREATE INDEX "repos_id,statusops" ON augur_operations.repos_fetch_log USING btree (repos_id, status);
-
-
---
--- Name: contributor_repo contributor_repo_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributor_repo
-    ADD CONSTRAINT contributor_repo_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: contributors_aliases contributors_aliases_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.contributors_aliases
-    ADD CONSTRAINT contributors_aliases_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_reviews fk-review-platform; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviews
-    ADD CONSTRAINT "fk-review-platform" FOREIGN KEY (platform_id) REFERENCES augur_data.platform(pltfrm_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: commit_comment_ref fk_commit_comment_ref_commits_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commit_comment_ref
-    ADD CONSTRAINT fk_commit_comment_ref_commits_1 FOREIGN KEY (cmt_id) REFERENCES augur_data.commits(cmt_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: commit_comment_ref fk_commit_comment_ref_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commit_comment_ref
-    ADD CONSTRAINT fk_commit_comment_ref_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: commit_parents fk_commit_parents_commits_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commit_parents
-    ADD CONSTRAINT fk_commit_parents_commits_1 FOREIGN KEY (cmt_id) REFERENCES augur_data.commits(cmt_id);
-
-
---
--- Name: commit_parents fk_commit_parents_commits_2; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commit_parents
-    ADD CONSTRAINT fk_commit_parents_commits_2 FOREIGN KEY (parent_id) REFERENCES augur_data.commits(cmt_id);
-
-
---
--- Name: commits fk_commits_contributors_3; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commits
-    ADD CONSTRAINT fk_commits_contributors_3 FOREIGN KEY (cmt_author_platform_username) REFERENCES augur_data.contributors(cntrb_login) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: commits fk_commits_contributors_4; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commits
-    ADD CONSTRAINT fk_commits_contributors_4 FOREIGN KEY (cmt_author_platform_username) REFERENCES augur_data.contributors(cntrb_login) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: commits fk_commits_repo_2; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.commits
-    ADD CONSTRAINT fk_commits_repo_2 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: discourse_insights fk_discourse_insights_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.discourse_insights
-    ADD CONSTRAINT fk_discourse_insights_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id);
-
-
---
--- Name: issue_assignees fk_issue_assignee_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_assignees
-    ADD CONSTRAINT fk_issue_assignee_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: issue_assignees fk_issue_assignees_issues_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_assignees
-    ADD CONSTRAINT fk_issue_assignees_issues_1 FOREIGN KEY (issue_id) REFERENCES augur_data.issues(issue_id);
-
-
---
--- Name: issue_events fk_issue_event_platform_ide; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_events
-    ADD CONSTRAINT fk_issue_event_platform_ide FOREIGN KEY (platform_id) REFERENCES augur_data.platform(pltfrm_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: issue_events fk_issue_events_issues_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_events
-    ADD CONSTRAINT fk_issue_events_issues_1 FOREIGN KEY (issue_id) REFERENCES augur_data.issues(issue_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: issue_events fk_issue_events_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_events
-    ADD CONSTRAINT fk_issue_events_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: issue_labels fk_issue_labels_issues_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_labels
-    ADD CONSTRAINT fk_issue_labels_issues_1 FOREIGN KEY (issue_id) REFERENCES augur_data.issues(issue_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: issue_labels fk_issue_labels_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_labels
-    ADD CONSTRAINT fk_issue_labels_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: issue_message_ref fk_issue_message_ref_issues_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_message_ref
-    ADD CONSTRAINT fk_issue_message_ref_issues_1 FOREIGN KEY (issue_id) REFERENCES augur_data.issues(issue_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: issue_message_ref fk_issue_message_ref_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_message_ref
-    ADD CONSTRAINT fk_issue_message_ref_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: issues fk_issues_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issues
-    ADD CONSTRAINT fk_issues_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: libraries fk_libraries_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.libraries
-    ADD CONSTRAINT fk_libraries_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: library_dependencies fk_library_dependencies_libraries_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.library_dependencies
-    ADD CONSTRAINT fk_library_dependencies_libraries_1 FOREIGN KEY (library_id) REFERENCES augur_data.libraries(library_id);
-
-
---
--- Name: library_version fk_library_version_libraries_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.library_version
-    ADD CONSTRAINT fk_library_version_libraries_1 FOREIGN KEY (library_id) REFERENCES augur_data.libraries(library_id);
-
-
---
--- Name: lstm_anomaly_results fk_lstm_anomaly_results_lstm_anomaly_models_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.lstm_anomaly_results
-    ADD CONSTRAINT fk_lstm_anomaly_results_lstm_anomaly_models_1 FOREIGN KEY (model_id) REFERENCES augur_data.lstm_anomaly_models(model_id);
-
-
---
--- Name: lstm_anomaly_results fk_lstm_anomaly_results_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.lstm_anomaly_results
-    ADD CONSTRAINT fk_lstm_anomaly_results_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: message_analysis fk_message_analysis_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message_analysis
-    ADD CONSTRAINT fk_message_analysis_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id);
-
-
---
--- Name: message_analysis_summary fk_message_analysis_summary_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message_analysis_summary
-    ADD CONSTRAINT fk_message_analysis_summary_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: message fk_message_platform_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message
-    ADD CONSTRAINT fk_message_platform_1 FOREIGN KEY (pltfrm_id) REFERENCES augur_data.platform(pltfrm_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: message fk_message_repo_groups_list_serve_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message
-    ADD CONSTRAINT fk_message_repo_groups_list_serve_1 FOREIGN KEY (rgls_id) REFERENCES augur_data.repo_groups_list_serve(rgls_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: message fk_message_repoid; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message
-    ADD CONSTRAINT fk_message_repoid FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: message_sentiment fk_message_sentiment_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message_sentiment
-    ADD CONSTRAINT fk_message_sentiment_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id);
-
-
---
--- Name: message_sentiment_summary fk_message_sentiment_summary_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message_sentiment_summary
-    ADD CONSTRAINT fk_message_sentiment_summary_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: pull_request_message_ref fk_pr_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_message_ref
-    ADD CONSTRAINT fk_pr_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: pull_request_analysis fk_pull_request_analysis_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_analysis
-    ADD CONSTRAINT fk_pull_request_analysis_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_assignees fk_pull_request_assignees_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_assignees
-    ADD CONSTRAINT fk_pull_request_assignees_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_assignees fk_pull_request_assignees_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_assignees
-    ADD CONSTRAINT fk_pull_request_assignees_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_commits fk_pull_request_commits_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_commits
-    ADD CONSTRAINT fk_pull_request_commits_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_files fk_pull_request_commits_pull_requests_1_copy_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_files
-    ADD CONSTRAINT fk_pull_request_commits_pull_requests_1_copy_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_commits fk_pull_request_commits_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_commits
-    ADD CONSTRAINT fk_pull_request_commits_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: pull_request_events fk_pull_request_events_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_events
-    ADD CONSTRAINT fk_pull_request_events_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_files fk_pull_request_files_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_files
-    ADD CONSTRAINT fk_pull_request_files_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_labels fk_pull_request_labels_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_labels
-    ADD CONSTRAINT fk_pull_request_labels_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_labels fk_pull_request_labels_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_labels
-    ADD CONSTRAINT fk_pull_request_labels_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: pull_request_message_ref fk_pull_request_message_ref_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_message_ref
-    ADD CONSTRAINT fk_pull_request_message_ref_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_message_ref fk_pull_request_message_ref_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_message_ref
-    ADD CONSTRAINT fk_pull_request_message_ref_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_meta fk_pull_request_meta_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_meta
-    ADD CONSTRAINT fk_pull_request_meta_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_meta fk_pull_request_repo_meta_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_meta
-    ADD CONSTRAINT fk_pull_request_repo_meta_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_repo fk_pull_request_repo_pull_request_meta_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_repo
-    ADD CONSTRAINT fk_pull_request_repo_pull_request_meta_1 FOREIGN KEY (pr_repo_meta_id) REFERENCES augur_data.pull_request_meta(pr_repo_meta_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_review_message_ref fk_pull_request_review_message_ref_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_review_message_ref
-    ADD CONSTRAINT fk_pull_request_review_message_ref_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_review_message_ref fk_pull_request_review_message_ref_pull_request_reviews_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_review_message_ref
-    ADD CONSTRAINT fk_pull_request_review_message_ref_pull_request_reviews_1 FOREIGN KEY (pr_review_id) REFERENCES augur_data.pull_request_reviews(pr_review_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_reviewers fk_pull_request_reviewers_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviewers
-    ADD CONSTRAINT fk_pull_request_reviewers_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_reviews fk_pull_request_reviews_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviews
-    ADD CONSTRAINT fk_pull_request_reviews_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_teams fk_pull_request_teams_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_teams
-    ADD CONSTRAINT fk_pull_request_teams_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_requests fk_pull_requests_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_requests
-    ADD CONSTRAINT fk_pull_requests_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: releases fk_releases_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.releases
-    ADD CONSTRAINT fk_releases_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_badging fk_repo_badging_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_badging
-    ADD CONSTRAINT fk_repo_badging_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_cluster_messages fk_repo_cluster_messages_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_cluster_messages
-    ADD CONSTRAINT fk_repo_cluster_messages_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_group_insights fk_repo_group_insights_repo_groups_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_group_insights
-    ADD CONSTRAINT fk_repo_group_insights_repo_groups_1 FOREIGN KEY (repo_group_id) REFERENCES augur_data.repo_groups(repo_group_id);
-
-
---
--- Name: repo_groups_list_serve fk_repo_groups_list_serve_repo_groups_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_groups_list_serve
-    ADD CONSTRAINT fk_repo_groups_list_serve_repo_groups_1 FOREIGN KEY (repo_group_id) REFERENCES augur_data.repo_groups(repo_group_id);
-
-
---
--- Name: issue_message_ref fk_repo_id_fk1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_message_ref
-    ADD CONSTRAINT fk_repo_id_fk1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: repo_info fk_repo_info_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_info
-    ADD CONSTRAINT fk_repo_info_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_insights fk_repo_insights_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_insights
-    ADD CONSTRAINT fk_repo_insights_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_labor fk_repo_labor_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_labor
-    ADD CONSTRAINT fk_repo_labor_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_meta fk_repo_meta_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_meta
-    ADD CONSTRAINT fk_repo_meta_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo fk_repo_repo_groups_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo
-    ADD CONSTRAINT fk_repo_repo_groups_1 FOREIGN KEY (repo_group_id) REFERENCES augur_data.repo_groups(repo_group_id);
-
-
---
--- Name: CONSTRAINT fk_repo_repo_groups_1 ON repo; Type: COMMENT; Schema: augur_data; Owner: augur
---
-
-COMMENT ON CONSTRAINT fk_repo_repo_groups_1 ON augur_data.repo IS 'Repo_groups cardinality set to one and only one because, although in theory there could be more than one repo group for a repo, this might create dependencies in hosted situation that we do not want to live with. ';
-
-
---
--- Name: pull_request_reviews fk_repo_review; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviews
-    ADD CONSTRAINT fk_repo_review FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: repo_stats fk_repo_stats_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_stats
-    ADD CONSTRAINT fk_repo_stats_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_test_coverage fk_repo_test_coverage_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_test_coverage
-    ADD CONSTRAINT fk_repo_test_coverage_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_topic fk_repo_topic_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_topic
-    ADD CONSTRAINT fk_repo_topic_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: pull_request_review_message_ref fk_review_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_review_message_ref
-    ADD CONSTRAINT fk_review_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_events fkpr_platform; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_events
-    ADD CONSTRAINT fkpr_platform FOREIGN KEY (platform_id) REFERENCES augur_data.platform(pltfrm_id) ON UPDATE RESTRICT ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: pull_request_events fkprevent_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_events
-    ADD CONSTRAINT fkprevent_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE RESTRICT ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED;
-
-
---
--- Name: issue_assignees issue_assignees_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_assignees
-    ADD CONSTRAINT issue_assignees_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id);
-
-
---
--- Name: issue_events issue_events_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issue_events
-    ADD CONSTRAINT issue_events_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: issues issues_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issues
-    ADD CONSTRAINT issues_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id);
-
-
---
--- Name: issues issues_reporter_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.issues
-    ADD CONSTRAINT issues_reporter_id_fkey FOREIGN KEY (reporter_id) REFERENCES augur_data.contributors(cntrb_id);
-
-
---
--- Name: message message_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.message
-    ADD CONSTRAINT message_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_assignees pull_request_assignees_contrib_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_assignees
-    ADD CONSTRAINT pull_request_assignees_contrib_id_fkey FOREIGN KEY (contrib_id) REFERENCES augur_data.contributors(cntrb_id);
-
-
---
--- Name: pull_request_commits pull_request_commits_pr_cmt_author_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_commits
-    ADD CONSTRAINT pull_request_commits_pr_cmt_author_cntrb_id_fkey FOREIGN KEY (pr_cmt_author_cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_events pull_request_events_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_events
-    ADD CONSTRAINT pull_request_events_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id);
-
-
---
--- Name: pull_request_meta pull_request_meta_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_meta
-    ADD CONSTRAINT pull_request_meta_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id);
-
-
---
--- Name: pull_request_repo pull_request_repo_pr_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_repo
-    ADD CONSTRAINT pull_request_repo_pr_cntrb_id_fkey FOREIGN KEY (pr_cntrb_id) REFERENCES augur_data.contributors(cntrb_id);
-
-
---
--- Name: pull_request_reviewers pull_request_reviewers_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviewers
-    ADD CONSTRAINT pull_request_reviewers_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: pull_request_reviews pull_request_reviews_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_request_reviews
-    ADD CONSTRAINT pull_request_reviews_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: pull_requests pull_requests_pr_augur_contributor_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.pull_requests
-    ADD CONSTRAINT pull_requests_pr_augur_contributor_id_fkey FOREIGN KEY (pr_augur_contributor_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE RESTRICT;
-
-
---
--- Name: repo_dependencies repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_dependencies
-    ADD CONSTRAINT repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_deps_scorecard repo_id_copy_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_deps_scorecard
-    ADD CONSTRAINT repo_id_copy_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_deps_libyear repo_id_copy_2; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_deps_libyear
-    ADD CONSTRAINT repo_id_copy_2 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id);
-
-
---
--- Name: repo_insights_records repo_id_ref; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_insights_records
-    ADD CONSTRAINT repo_id_ref FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE SET NULL;
-
-
---
--- Name: repo_sbom_scans repo_linker_sbom; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur
---
-
-ALTER TABLE ONLY augur_data.repo_sbom_scans
-    ADD CONSTRAINT repo_linker_sbom FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE CASCADE;
-
-
---
--- Name: annotations annotations_annotation_type_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.annotations
-    ADD CONSTRAINT annotations_annotation_type_id_fkey FOREIGN KEY (annotation_type_id) REFERENCES spdx.annotation_types(annotation_type_id);
-
-
---
--- Name: annotations annotations_creator_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.annotations
-    ADD CONSTRAINT annotations_creator_id_fkey FOREIGN KEY (creator_id) REFERENCES spdx.creators(creator_id);
-
-
---
--- Name: annotations annotations_document_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.annotations
-    ADD CONSTRAINT annotations_document_id_fkey FOREIGN KEY (document_id) REFERENCES spdx.documents(document_id);
-
-
---
--- Name: annotations annotations_identifier_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.annotations
-    ADD CONSTRAINT annotations_identifier_id_fkey FOREIGN KEY (identifier_id) REFERENCES spdx.identifiers(identifier_id);
-
-
---
--- Name: creators creators_creator_type_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.creators
-    ADD CONSTRAINT creators_creator_type_id_fkey FOREIGN KEY (creator_type_id) REFERENCES spdx.creator_types(creator_type_id);
-
-
---
--- Name: documents_creators documents_creators_creator_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.documents_creators
-    ADD CONSTRAINT documents_creators_creator_id_fkey FOREIGN KEY (creator_id) REFERENCES spdx.creators(creator_id);
-
-
---
--- Name: documents_creators documents_creators_document_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.documents_creators
-    ADD CONSTRAINT documents_creators_document_id_fkey FOREIGN KEY (document_id) REFERENCES spdx.documents(document_id);
-
-
---
--- Name: documents documents_data_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.documents
-    ADD CONSTRAINT documents_data_license_id_fkey FOREIGN KEY (data_license_id) REFERENCES spdx.licenses(license_id);
-
-
---
--- Name: documents documents_document_namespace_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.documents
-    ADD CONSTRAINT documents_document_namespace_id_fkey FOREIGN KEY (document_namespace_id) REFERENCES spdx.document_namespaces(document_namespace_id);
-
-
---
--- Name: documents documents_package_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.documents
-    ADD CONSTRAINT documents_package_id_fkey FOREIGN KEY (package_id) REFERENCES spdx.packages(package_id);
-
-
---
--- Name: external_refs external_refs_document_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.external_refs
-    ADD CONSTRAINT external_refs_document_id_fkey FOREIGN KEY (document_id) REFERENCES spdx.documents(document_id);
-
-
---
--- Name: external_refs external_refs_document_namespace_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.external_refs
-    ADD CONSTRAINT external_refs_document_namespace_id_fkey FOREIGN KEY (document_namespace_id) REFERENCES spdx.document_namespaces(document_namespace_id);
-
-
---
--- Name: file_contributors file_contributors_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.file_contributors
-    ADD CONSTRAINT file_contributors_file_id_fkey FOREIGN KEY (file_id) REFERENCES spdx.files(file_id);
-
-
---
--- Name: files_licenses files_licenses_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files_licenses
-    ADD CONSTRAINT files_licenses_file_id_fkey FOREIGN KEY (file_id) REFERENCES spdx.files(file_id);
-
-
---
--- Name: files_licenses files_licenses_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files_licenses
-    ADD CONSTRAINT files_licenses_license_id_fkey FOREIGN KEY (license_id) REFERENCES spdx.licenses(license_id);
-
-
---
--- Name: files_scans files_scans_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files_scans
-    ADD CONSTRAINT files_scans_file_id_fkey FOREIGN KEY (file_id) REFERENCES spdx.files(file_id);
-
-
---
--- Name: files_scans files_scans_scanner_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.files_scans
-    ADD CONSTRAINT files_scans_scanner_id_fkey FOREIGN KEY (scanner_id) REFERENCES spdx.scanners(scanner_id);
-
-
---
--- Name: packages_files fk_package_files_packages; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_files
-    ADD CONSTRAINT fk_package_files_packages FOREIGN KEY (package_id) REFERENCES spdx.packages(package_id);
-
-
---
--- Name: packages fk_package_packages_files; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages
-    ADD CONSTRAINT fk_package_packages_files FOREIGN KEY (ver_code_excluded_file_id) REFERENCES spdx.packages_files(package_file_id);
-
-
---
--- Name: identifiers identifiers_document_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT identifiers_document_id_fkey FOREIGN KEY (document_id) REFERENCES spdx.documents(document_id);
-
-
---
--- Name: identifiers identifiers_document_namespace_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT identifiers_document_namespace_id_fkey FOREIGN KEY (document_namespace_id) REFERENCES spdx.document_namespaces(document_namespace_id);
-
-
---
--- Name: identifiers identifiers_package_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT identifiers_package_file_id_fkey FOREIGN KEY (package_file_id) REFERENCES spdx.packages_files(package_file_id);
-
-
---
--- Name: identifiers identifiers_package_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.identifiers
-    ADD CONSTRAINT identifiers_package_id_fkey FOREIGN KEY (package_id) REFERENCES spdx.packages(package_id);
-
-
---
--- Name: packages packages_concluded_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages
-    ADD CONSTRAINT packages_concluded_license_id_fkey FOREIGN KEY (concluded_license_id) REFERENCES spdx.licenses(license_id);
-
-
---
--- Name: packages packages_declared_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages
-    ADD CONSTRAINT packages_declared_license_id_fkey FOREIGN KEY (declared_license_id) REFERENCES spdx.licenses(license_id);
-
-
---
--- Name: packages_files packages_files_concluded_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_files
-    ADD CONSTRAINT packages_files_concluded_license_id_fkey FOREIGN KEY (concluded_license_id) REFERENCES spdx.licenses(license_id);
-
-
---
--- Name: packages_files packages_files_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_files
-    ADD CONSTRAINT packages_files_file_id_fkey FOREIGN KEY (file_id) REFERENCES spdx.files(file_id);
-
-
---
--- Name: packages packages_originator_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages
-    ADD CONSTRAINT packages_originator_id_fkey FOREIGN KEY (originator_id) REFERENCES spdx.creators(creator_id);
-
-
---
--- Name: packages_scans packages_scans_package_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_scans
-    ADD CONSTRAINT packages_scans_package_id_fkey FOREIGN KEY (package_id) REFERENCES spdx.packages(package_id);
-
-
---
--- Name: packages_scans packages_scans_scanner_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages_scans
-    ADD CONSTRAINT packages_scans_scanner_id_fkey FOREIGN KEY (scanner_id) REFERENCES spdx.scanners(scanner_id);
-
-
---
--- Name: packages packages_supplier_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.packages
-    ADD CONSTRAINT packages_supplier_id_fkey FOREIGN KEY (supplier_id) REFERENCES spdx.creators(creator_id);
-
-
---
--- Name: relationships relationships_left_identifier_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.relationships
-    ADD CONSTRAINT relationships_left_identifier_id_fkey FOREIGN KEY (left_identifier_id) REFERENCES spdx.identifiers(identifier_id);
-
-
---
--- Name: relationships relationships_relationship_type_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.relationships
-    ADD CONSTRAINT relationships_relationship_type_id_fkey FOREIGN KEY (relationship_type_id) REFERENCES spdx.relationship_types(relationship_type_id);
-
-
---
--- Name: relationships relationships_right_identifier_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur
---
-
-ALTER TABLE ONLY spdx.relationships
-    ADD CONSTRAINT relationships_right_identifier_id_fkey FOREIGN KEY (right_identifier_id) REFERENCES spdx.identifiers(identifier_id);
-
-
---
--- PostgreSQL database dump complete
---
-

From d0da0318fb77d3c408c631084fe8d88fa175b8c4 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
Date: Sun, 11 Jan 2026 15:41:45 -0500
Subject: [PATCH 099/104] add timeout value for the job

Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com>
---
 .github/workflows/functional_test.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/functional_test.yml b/.github/workflows/functional_test.yml
index 544029df0e..eaa50adf30 100644
--- a/.github/workflows/functional_test.yml
+++ b/.github/workflows/functional_test.yml
@@ -11,6 +11,7 @@ jobs:
   test:
     name: test with ${{ matrix.env }} on ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
+    timeout-minutes: 15
     strategy:
       fail-fast: false
       matrix:

From c43b2f39a4ae3ab68c0ee2925024eff5e8b26814 Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 2 Dec 2025 11:01:32 -0500
Subject: [PATCH 100/104] First draft of new database table for repo_aliases

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/application/db/models/__init__.py   |  1 +
 augur/application/db/models/augur_data.py | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/augur/application/db/models/__init__.py b/augur/application/db/models/__init__.py
index f729f0ac1e..06ca9cb910 100644
--- a/augur/application/db/models/__init__.py
+++ b/augur/application/db/models/__init__.py
@@ -14,6 +14,7 @@
     ContributorRepo,
     ContributorsAlias,
     Repo,
+    HistoricalRepoURLs,
     RepoTestCoverage,
     RepoGroupInsight,
     RepoGroupsListServe,
diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py
index 9f7d8c7fb3..78c2ce7151 100644
--- a/augur/application/db/models/augur_data.py
+++ b/augur/application/db/models/augur_data.py
@@ -5,6 +5,7 @@
     CHAR,
     Column,
     Date,
+    DateTime,
     Float,
     ForeignKey,
     Index,
@@ -17,6 +18,7 @@
     Text,
     UniqueConstraint,
     text,
+    func
 )
 from sqlalchemy.dialects.postgresql import JSONB, TIMESTAMP, UUID
 from sqlalchemy.orm import relationship
@@ -1166,6 +1168,23 @@ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_
 
 
 
+class HistoricalRepoURLs(Base):
+    """ A table for storing previously-used git URLs for a repository
+    This is used to enable lookups that resolve historical URLs to the repo_id for a given repository
+    When a repo is detected as moved and its url is updated in the repo table, the old URL gets added to this table.
+    
+    The date_collected field allows for history of a repo with multiple URL changes to be inferred,
+    for example, when an old url is moved to this table, its date serves as both an end date
+    for the previous old url, and as the start date for the one that was just moved.
+    The currently-valid URL remains in the repo table and is not moved here until it has been superseded.
+    """
+
+    __tablename__ = "historical_repo_urls"
+    __table_args__ = {"schema": "augur_data"}
+
+    repo_id = Column(ForeignKey("augur_data.repo.repo_id"), primary_key=True)
+    git_url = Column(String, primary_key=True)
+    date_collected = Column(DateTime(timezone=True), server_default=func.now(), nullable=True)
         
 class RepoTestCoverage(Base):
     __tablename__ = "repo_test_coverage"

From 4c9244a3859bfe03dff3f80c74ad33eebfb5819d Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Tue, 2 Dec 2025 14:11:56 -0500
Subject: [PATCH 101/104] add code in update_repo_with_dict that adds values to
 the new repo_aliases table

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 6b47df1a32..2ad96de671 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -7,6 +7,8 @@
 from augur.tasks.util.collection_state import CollectionState
 from augur.application.db.util import execute_session_query
 from augur.application.db.lib import bulk_insert_dicts
+from augur.application.db.models import HistoricalRepoURLs
+from sqlalchemy.exc import IntegrityError
 
 
 class RepoMovedException(Exception):
@@ -29,12 +31,24 @@ def update_repo_with_dict(repo,new_dict,logger):
     """
     to_insert = dict(repo.__dict__)
     del to_insert['_sa_instance_state']
+
+    old_url = to_insert["repo_git"]
+    repo_id = to_insert["repo_id"]
+
+    with DatabaseSession(logger) as session:
+        previous_alias = HistoricalRepoURLs(repo_id=repo_id, git_url=old_url)
+        try:
+            result = session.add(previous_alias)
+            session.commit()
+        except IntegrityError as e: #Unique violation
+            session.rollback()    
+
     to_insert.update(new_dict)
 
     result = bulk_insert_dicts(logger, to_insert, Repo, ['repo_id'])
 
     url = to_insert['repo_git']
-    logger.info(f"Updated repo for {url}\n")
+    logger.info(f"Updated repo {old_url} to {url} and set alias\n")
 
 
 

From 1e7a26d8eff053c6ff8b90c574998022bdf596ee Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Mon, 15 Dec 2025 16:49:35 -0500
Subject: [PATCH 102/104] seems like retry needs a value passed into it.

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 augur/tasks/github/detect_move/core.py  | 9 ++++++---
 augur/tasks/github/detect_move/tasks.py | 5 ++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py
index 2ad96de671..a3eb3803d6 100644
--- a/augur/tasks/github/detect_move/core.py
+++ b/augur/tasks/github/detect_move/core.py
@@ -12,7 +12,9 @@
 
 
 class RepoMovedException(Exception):
-    pass
+    def __init__(self, message, new_url=None): 
+        super().__init__(message)
+        self.new_url = new_url 
 
 class RepoGoneException(Exception):
     pass
@@ -49,6 +51,7 @@ def update_repo_with_dict(repo,new_dict,logger):
 
     url = to_insert['repo_git']
     logger.info(f"Updated repo {old_url} to {url} and set alias\n")
+    return url
 
 
 
@@ -104,9 +107,9 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c
             'description': f"(Originally hosted at {url}) {old_description}"
         }
 
-        update_repo_with_dict(repo, repo_update_dict, logger)
+        new_url = update_repo_with_dict(repo, repo_update_dict, logger)
 
-        raise RepoMovedException("ERROR: Repo has moved! Resetting Collection!")
+        raise RepoMovedException("ERROR: Repo has moved! Resetting Collection!", new_url=new_url)
     
     #Mark as ignore if 404
     if response_from_gh.status_code == 404:
diff --git a/augur/tasks/github/detect_move/tasks.py b/augur/tasks/github/detect_move/tasks.py
index 6f7b04b8de..249ff1a0de 100644
--- a/augur/tasks/github/detect_move/tasks.py
+++ b/augur/tasks/github/detect_move/tasks.py
@@ -29,7 +29,10 @@ def detect_github_repo_move_core(repo_git : str) -> None:
         try:
             ping_github_for_repo_move(session, key_auth, repo, logger)
         except RepoMovedException as e:
-            raise Retry(e)
+            if e.new_url is not None:
+                raise Retry(e.new_url)
+            else:
+                raise Reject(e)
         except RepoGoneException as e:
             raise Reject(e)
 

From e05bcce8d47625c1a882b1943ded62c739e9a31f Mon Sep 17 00:00:00 2001
From: Adrian Edwards <adredwar@redhat.com>
Date: Mon, 15 Dec 2025 15:53:57 -0500
Subject: [PATCH 103/104] Add migration for new table

Signed-off-by: Adrian Edwards <adredwar@redhat.com>
---
 .../38_add_historical_repo_urls_table.py      | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 augur/application/schema/alembic/versions/38_add_historical_repo_urls_table.py

diff --git a/augur/application/schema/alembic/versions/38_add_historical_repo_urls_table.py b/augur/application/schema/alembic/versions/38_add_historical_repo_urls_table.py
new file mode 100644
index 0000000000..dda3c17188
--- /dev/null
+++ b/augur/application/schema/alembic/versions/38_add_historical_repo_urls_table.py
@@ -0,0 +1,35 @@
+"""add historical repo urls table
+
+Revision ID: 38
+Revises: 37
+Create Date: 2025-12-15 15:50:31.819780
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '38'
+down_revision = '37'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('historical_repo_urls',
+    sa.Column('repo_id', sa.BigInteger(), nullable=False),
+    sa.Column('git_url', sa.String(), nullable=False),
+    sa.Column('date_collected', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
+    sa.ForeignKeyConstraint(['repo_id'], ['augur_data.repo.repo_id'], ),
+    sa.PrimaryKeyConstraint('repo_id', 'git_url'),
+    schema='augur_data'
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('historical_repo_urls', schema='augur_data')
+    # ### end Alembic commands ###

From 0592017b6fa26ef46ffb678cf0eeccd777f63d11 Mon Sep 17 00:00:00 2001
From: "Sean P. Goggins" <s@goggins.com>
Date: Tue, 20 Jan 2026 18:19:54 -0600
Subject: [PATCH 104/104] updated metadata

Signed-off-by: Sean P. Goggins <s@goggins.com>
---
 README.md   | 6 +++---
 metadata.py | 8 +++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index e59180de0c..0a0ab7deab 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Augur NEW Release v0.91.0
+# Augur NEW Release v0.92.0
 
 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data - less data carpentry for everyone else! 
 The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot), a public instance of 8Knot is available [here](https://metrix.chaoss.io) - this is tied to a public instance of [Augur](https://ai.chaoss.io). 
@@ -11,7 +11,7 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o
 ## NEW RELEASE ALERT!
 **If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**.
 
-Augur is now releasing a dramatically improved new version. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.91.0).
+Augur is now releasing a dramatically improved new version. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.92.0).
 
 
 - The `release` branch is a stable version of our new architecture, which features:
@@ -83,7 +83,7 @@ We strongly believe that much of what makes open source so great is the incredib
 
 ## License, Copyright, and Funding
 
-Copyright © 2025 University of Nebraska at Omaha, University of Missouri, Brian Warner, and the CHAOSS Project.
+Copyright © 2025 University of Missouri, Sean Goggins, and Derek Howard.
 
 Augur is free software: you can redistribute it and/or modify it under the terms of the MIT License as published by the Open Source Initiative. See the [LICENSE](LICENSE) file for more details.
 
diff --git a/metadata.py b/metadata.py
index cf6893b2f8..71827630af 100644
--- a/metadata.py
+++ b/metadata.py
@@ -5,8 +5,10 @@
 
 __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection"
 
-__version__ = "0.91.0"
-__release__ = "v0.91.0 (What's Up Augur? We are Software!)"
+__version__ = "0.92.0"
+__release__ = "v0.92.0 (Paladin Penguin)"
+__author__ = "Augur Team"
+__author_email__ = "outdoors@acm.org"
 
 __license__ = "MIT"
-__copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Derek Howard, Sean Goggins, Brian Warner & Augurlabs 2025, Red Hat Software"
+__copyright__ = "University of Missouri, Derek Howard, Sean Goggins, Augurlabs 2025"