From 99fc30cf5e9e08cfb8464696e3a400a331e61d9d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 24 Oct 2025 10:45:06 -0400 Subject: [PATCH 001/104] remove unused functions Signed-off-by: Adrian Edwards --- augur/tasks/util/collection_util.py | 95 ----------------------------- 1 file changed, 95 deletions(-) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index 28489d63c8..66958e2fe9 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -18,101 +18,6 @@ from augur.tasks.util.collection_state import CollectionState -def get_list_of_all_users(): - #Get a list of all users. - query = s.sql.text(""" - SELECT - user_id - FROM augur_operations.users - """) - - users = execute_sql(query).fetchall() - return users - - -def get_required_conditions_for_core_repos(allow_collected_before = False, days_until_collect_again = 1): - - if not allow_collected_before: - condition_concat_string = f""" - core_status='{str(CollectionState.PENDING.value)}' AND core_status!='{str(CollectionState.ERROR.value)}' - AND augur_operations.collection_status.core_data_last_collected IS NULL - AND core_status!='{str(CollectionState.COLLECTING.value)}' - """ - else: - condition_concat_string = f""" - core_status='Success' AND core_status!='{str(CollectionState.ERROR.value)}' - AND augur_operations.collection_status.core_data_last_collected IS NOT NULL - AND core_status!='{str(CollectionState.COLLECTING.value)}' - AND core_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS' - """ - - return condition_concat_string - -def get_required_conditions_for_secondary_repos(allow_collected_before = False, days_until_collect_again = 1): - - if not allow_collected_before: - condition_concat_string = f""" - secondary_status='{str(CollectionState.PENDING.value)}' AND secondary_status!='{str(CollectionState.ERROR.value)}' - AND augur_operations.collection_status.core_status = '{str(CollectionState.SUCCESS.value)}' - AND augur_operations.collection_status.secondary_data_last_collected IS NULL - AND secondary_status!='{str(CollectionState.COLLECTING.value)}' - """ - else: - condition_concat_string = f""" - secondary_status='Success' AND secondary_status!='{str(CollectionState.ERROR.value)}' - AND augur_operations.collection_status.secondary_data_last_collected IS NOT NULL - AND augur_operations.collection_status.core_status = '{str(CollectionState.SUCCESS.value)}' - AND secondary_status!='{str(CollectionState.COLLECTING.value)}' - AND secondary_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS' - """ - - return condition_concat_string - -def get_required_conditions_for_facade_repos(allow_collected_before = False, days_until_collect_again = 1): - - if not allow_collected_before: - condition_concat_string = f""" - facade_status='{str(CollectionState.UPDATE.value)}' AND facade_status!='{str(CollectionState.ERROR.value)}' - AND augur_operations.collection_status.facade_status != '{str(CollectionState.PENDING.value)}' - AND augur_operations.collection_status.facade_status != '{str(CollectionState.FAILED_CLONE.value)}' - AND augur_operations.collection_status.facade_status != '{str(CollectionState.INITIALIZING.value)}' - AND augur_operations.collection_status.facade_data_last_collected IS NULL - AND facade_status!='{str(CollectionState.COLLECTING.value)}' - """ - else: - condition_concat_string = f""" - facade_status='Success' AND facade_status!='{str(CollectionState.ERROR.value)}' - AND augur_operations.collection_status.facade_data_last_collected IS NOT NULL - AND augur_operations.collection_status.facade_status != '{str(CollectionState.PENDING.value)}' - AND augur_operations.collection_status.facade_status != '{str(CollectionState.FAILED_CLONE.value)}' - AND augur_operations.collection_status.facade_status != '{str(CollectionState.INITIALIZING.value)}' - AND facade_status!='{str(CollectionState.COLLECTING.value)}' - AND facade_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS' - """ - - return condition_concat_string - -def get_required_conditions_for_ml_repos(allow_collected_before = False, days_until_collect_again = 1): - - if not allow_collected_before: - condition_concat_string = f""" - ml_status='{str(CollectionState.PENDING.value)}' AND ml_status!='{str(CollectionState.ERROR.value)}' - AND augur_operations.collection_status.secondary_status = '{str(CollectionState.SUCCESS.value)}' - AND augur_operations.collection_status.ml_data_last_collected IS NULL - AND ml_status!='{str(CollectionState.COLLECTING.value)}' - """ - else: - condition_concat_string = f""" - ml_status='Success' AND ml_status!='{str(CollectionState.ERROR.value)}' - AND augur_operations.collection_status.ml_data_last_collected IS NOT NULL - AND ml_status!='{str(CollectionState.COLLECTING.value)}' - AND ml_data_last_collected <= NOW() - INTERVAL '{days_until_collect_again} DAYS' - """ - - return condition_concat_string - - - class CollectionRequest: def __init__(self,name,phases,max_repo = 10,days_until_collect_again = 1, gitlab_phases=None): self.name = name From e080f6112a91a00be1a49d68b8207930a7bfdc4d Mon Sep 17 00:00:00 2001 From: Isaac Milarsky Date: Tue, 28 Oct 2025 18:01:15 -0500 Subject: [PATCH 002/104] add toggle option for messages Signed-off-by: Isaac Milarsky --- augur/application/config.py | 3 ++- augur/tasks/git/facade_tasks.py | 5 +++-- augur/tasks/git/util/facade_worker/facade_worker/config.py | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index 2cc6f65cdb..776bce63d1 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -52,7 +52,8 @@ def get_development_flag(): "rebuild_caches": 1, "run_analysis": 1, "run_facade_contributors": 1, - "facade_contributor_full_recollect": 0 + "facade_contributor_full_recollect": 0, + "commit_messages": 1, }, "Server": { "cache_expire": "3600", diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index 5baaed20d4..826fc023e1 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -315,7 +315,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: facade_bulk_insert_commits(logger, pendingCommitRecordsToInsert) pendingCommitRecordsToInsert = [] - if commit_msg: + if commit_msg and facade_helper.commit_messages: pendingCommitMessageRecordsToInsert.append(commit_msg) if len(pendingCommitMessageRecordsToInsert) >= 1000: @@ -446,7 +446,8 @@ def generate_analysis_sequence(logger,repo_git, facade_helper): analysis_sequence.append(trim_commits_post_analysis_facade_task.si(repo_git)) - analysis_sequence.append(facade_fetch_missing_commit_messages.si(repo_git)) + if facade_helper.commit_messages: + analysis_sequence.append(facade_fetch_missing_commit_messages.si(repo_git)) analysis_sequence.append(facade_analysis_end_facade_task.si()) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index f060b34390..49bbca7e88 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -128,6 +128,7 @@ def __init__(self,logger: Logger): self.multithreaded = worker_options["multithreaded"] self.create_xlsx_summary_files = worker_options["create_xlsx_summary_files"] self.facade_contributor_full_recollect = worker_options["facade_contributor_full_recollect"] + self.commit_messages = worker_options["commit_messages"] self.tool_source = "Facade" self.data_source = "Git Log" From d20c672e5b620b643f606701258789db3628d265 Mon Sep 17 00:00:00 2001 From: Xiaoha Date: Wed, 12 Nov 2025 01:24:04 +0000 Subject: [PATCH 003/104] feat: Add Topic Modeling database schema tables Add two new tables and ORM models for Topic Modeling versioning system: 1. topic_model_meta table (Migration 35): - Stores metadata for each trained topic model - 21 fields including model_id (UUID PK), repo_id (FK), training parameters, quality metrics (coherence_score, topic_diversity), and visualization data - Enables model versioning, comparison, and intelligent retraining 2. topic_model_event table (Migration 36): - Audit log for topic modeling events - Tracks training lifecycle: started, completed, retrain triggered, etc. - Provides observability for automated and manual training operations 3. TopicModelMeta ORM model: - SQLAlchemy model definition for topic_model_meta table - Relationships and field mappings for application layer These schema changes support the Topic Modeling feature that enables: - Automated NMF-based topic extraction from repository messages - Model version management and comparison - Intelligent retraining based on data/quality changes - Storage optimization via REPLACE strategy for automatic runs Related: #3207 Signed-off-by: Xiaoha --- augur/application/db/models/__init__.py | 1 + augur/application/db/models/augur_data.py | 104 ++++++++++++++++++ .../35_create_topic_model_meta_table.py | 69 ++++++++++++ .../versions/36_add_topic_model_event.py | 63 +++++++++++ 4 files changed, 237 insertions(+) create mode 100644 augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py create mode 100644 augur/application/schema/alembic/versions/36_add_topic_model_event.py diff --git a/augur/application/db/models/__init__.py b/augur/application/db/models/__init__.py index 013f22ab42..f729f0ac1e 100644 --- a/augur/application/db/models/__init__.py +++ b/augur/application/db/models/__init__.py @@ -39,6 +39,7 @@ RepoSbomScan, RepoStat, RepoTopic, + TopicModelMeta, CommitCommentRef, CommitParent, DiscourseInsight, diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index c80077d9b6..9751cfb5f4 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -3601,3 +3601,107 @@ class RepoClone(Base): clone_data_timestamp = Column(TIMESTAMP(precision=6)) repo = relationship("Repo") + + +class TopicModelMeta(Base): + __tablename__ = "topic_model_meta" + __table_args__ = {"schema": "augur_data"} + + model_id = Column( + UUID(as_uuid=True), + primary_key=True, + server_default=text("gen_random_uuid()"), + comment="Unique identifier for the topic model" + ) + repo_id = Column( + ForeignKey("augur_data.repo.repo_id"), + comment="Repository this model was trained on" + ) + model_method = Column( + String, + nullable=False, + comment="Method used for topic modeling (e.g., 'NMF_COUNT', 'LDA_TFIDF')" + ) + num_topics = Column( + Integer, + nullable=False, + comment="Number of topics in the model" + ) + num_words_per_topic = Column( + Integer, + nullable=False, + comment="Number of words per topic" + ) + training_parameters = Column( + JSON, + nullable=False, + comment="JSON object containing training parameters" + ) + model_file_paths = Column( + JSON, + nullable=False, + comment="JSON object containing paths to model artifacts" + ) + parameters_hash = Column( + String, + nullable=False, + comment="Hash of parameters for deduplication" + ) + coherence_score = Column( + Float, + nullable=False, + server_default=text("0.0"), + comment="Coherence score of the model" + ) + perplexity_score = Column( + Float, + nullable=False, + server_default=text("0.0"), + comment="Perplexity score of the model" + ) + topic_diversity = Column( + Float, + nullable=False, + server_default=text("0.0"), + comment="Topic diversity score" + ) + quality = Column( + JSON, + nullable=False, + server_default=text("'{}'::jsonb"), + comment="Quality metrics" + ) + training_message_count = Column( + BigInteger, + nullable=False, + comment="Number of messages used for training" + ) + data_fingerprint = Column( + JSON, + nullable=False, + comment="Fingerprint of training data" + ) + visualization_data = Column( + JSON, + nullable=True, + comment="JSON object containing visualization data for the model" + ) + training_start_time = Column( + TIMESTAMP(), + nullable=False, + comment="When training started" + ) + training_end_time = Column( + TIMESTAMP(), + nullable=False, + comment="When training ended" + ) + tool_source = Column(String, comment="Standard Augur Metadata") + tool_version = Column(String, comment="Standard Augur Metadata") + data_source = Column(String, comment="Standard Augur Metadata") + data_collection_date = Column( + TIMESTAMP(precision=0), + server_default=text("CURRENT_TIMESTAMP") + ) + + repo = relationship("Repo") diff --git a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py new file mode 100644 index 0000000000..9e4a00c3cc --- /dev/null +++ b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py @@ -0,0 +1,69 @@ +"""Create topic_model_meta table + +Revision ID: 35 +Revises: 34 +Create Date: 2024-08-28 20:30:00.000000 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '35' +down_revision = '34' +branch_labels = None +depends_on = None + + +def upgrade(): + # Create topic_model_meta table based on ER diagram with NOT NULL constraints + op.create_table('topic_model_meta', + # Primary key + sa.Column('model_id', sa.UUID(), server_default=sa.text('gen_random_uuid()'), nullable=False), + + # Foreign key to repo (nullable for multi-repo training) + sa.Column('repo_id', sa.Integer(), nullable=True), + + # Model metadata (all NOT NULL as requested) + sa.Column('model_method', sa.String(), nullable=False), + sa.Column('num_topics', sa.Integer(), nullable=False), + sa.Column('num_words_per_topic', sa.Integer(), nullable=False), + + # Parameters and configuration (NOT NULL) + sa.Column('training_parameters', postgresql.JSONB(), nullable=False), + sa.Column('model_file_paths', postgresql.JSONB(), nullable=False), + sa.Column('parameters_hash', sa.String(), nullable=False), + + # Quality metrics (NOT NULL, but can use default values) + sa.Column('coherence_score', sa.Float(), nullable=False, server_default=sa.text('0.0')), + sa.Column('perplexity_score', sa.Float(), nullable=False, server_default=sa.text('0.0')), + sa.Column('topic_diversity', sa.Float(), nullable=False, server_default=sa.text('0.0')), + sa.Column('quality', postgresql.JSONB(), nullable=False, server_default=sa.text("'{}'::jsonb")), + + # Training metadata (NOT NULL) + sa.Column('training_message_count', sa.BigInteger(), nullable=False), + sa.Column('data_fingerprint', postgresql.JSONB(), nullable=False), + + # Visualization data (optional) + sa.Column('visualization_data', postgresql.JSONB(), nullable=True), + + # Timestamps (NOT NULL with defaults) + sa.Column('training_start_time', sa.TIMESTAMP(), nullable=False), + sa.Column('training_end_time', sa.TIMESTAMP(), nullable=False), + sa.Column('data_collection_date', sa.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + + # Standard Augur metadata (NOT NULL) + sa.Column('tool_source', sa.String(), nullable=False), + sa.Column('tool_version', sa.String(), nullable=False), + sa.Column('data_source', sa.String(), nullable=False), + + # Constraints + sa.ForeignKeyConstraint(['repo_id'], ['augur_data.repo.repo_id'], ), + sa.PrimaryKeyConstraint('model_id'), + schema='augur_data' + ) + + +def downgrade(): + op.drop_table('topic_model_meta', schema='augur_data') \ No newline at end of file diff --git a/augur/application/schema/alembic/versions/36_add_topic_model_event.py b/augur/application/schema/alembic/versions/36_add_topic_model_event.py new file mode 100644 index 0000000000..8edcd90609 --- /dev/null +++ b/augur/application/schema/alembic/versions/36_add_topic_model_event.py @@ -0,0 +1,63 @@ +""" +Create topic_model_event table for DB event logging + +Revision ID: 36 +Revises: 35 +Create Date: 2025-08-21 +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "36" +down_revision = "35" +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table( + "topic_model_event", + sa.Column("event_id", sa.BigInteger(), primary_key=True), + sa.Column( + "ts", + sa.TIMESTAMP(), + server_default=sa.text("CURRENT_TIMESTAMP"), + nullable=False, + ), + sa.Column("repo_id", sa.Integer(), nullable=True), + sa.Column("model_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("event", sa.Text(), nullable=False), + sa.Column("level", sa.Text(), server_default=sa.text("'INFO'"), nullable=False), + sa.Column("payload", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.ForeignKeyConstraint( + ["repo_id"], ["augur_data.repo.repo_id"], name="fk_tme_repo_id" + ), + sa.ForeignKeyConstraint( + ["model_id"], + ["augur_data.topic_model_meta.model_id"], + name="fk_tme_model_id", + ondelete="SET NULL", + ), + schema="augur_data", + ) + op.create_index( + "ix_tme_repo_ts", "topic_model_event", ["repo_id", "ts"], schema="augur_data" + ) + op.create_index("ix_tme_event", "topic_model_event", ["event"], schema="augur_data") + op.create_index( + "ix_tme_payload", + "topic_model_event", + [sa.text("(payload)")], + unique=False, + schema="augur_data", + postgresql_using="gin", + ) + + +def downgrade(): + op.drop_index("ix_tme_payload", table_name="topic_model_event", schema="augur_data") + op.drop_index("ix_tme_event", table_name="topic_model_event", schema="augur_data") + op.drop_index("ix_tme_repo_ts", table_name="topic_model_event", schema="augur_data") + op.drop_table("topic_model_event", schema="augur_data") From d40e9acb91657b627cede8ca7e7c56e04de6fb0b Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 12 Nov 2025 11:14:32 -0500 Subject: [PATCH 004/104] fix typos in the tests folder. Signed-off-by: Shlok Gilda --- Makefile | 2 +- pyproject.toml | 2 +- .../test_cli/test_add_cli_repos.py | 2 +- .../test_cli/test_cli_functionality.py | 0 .../test_config/test_config.py | 0 .../test_models/test_augur_data/test_repo.py | 2 +- .../test_augur_data/test_repo_group.py | 2 +- .../test_augur_operations/test_user.py | 2 +- .../test_augur_operations/test_user_group.py | 2 +- .../test_augur_operations/test_user_repo.py | 2 +- .../test_db/test_session.py | 0 .../test_repo_load_controller/helper.py | 0 .../test_adding_orgs.py | 2 +- .../test_adding_repos.py | 2 +- .../test_helper_functions.py | 2 +- .../test_repo_load_controller/util.py | 0 .../test_github_random_key_auth.py | 0 .../test_key_auth/test_random_key_auth.py | 0 .../test_github_api_key_handler.py | 0 .../test_paginators/test_github_paginator.py | 0 .../test_redis/test_redis_list.py | 0 .../test_util/test_worker_util.py | 0 .../test_endpoints.py | 2 +- tests/test_workers/test_set_up_fixtures.py | 6 +++--- .../bad_Data.json | 0 .../contributors.json | 0 .../contributors_un_enriched.json | 0 .../standard_enrich_cntrb_id_data.json | 0 .../test_enrich_cntrb_id.py | 2 +- .../test_enrich_data_primary_keys.py | 20 +++++++++---------- .../util_persistence.py} | 8 ++++---- 31 files changed, 30 insertions(+), 30 deletions(-) rename tests/{test_applicaton => test_application}/test_cli/test_add_cli_repos.py (98%) rename tests/{test_applicaton => test_application}/test_cli/test_cli_functionality.py (100%) rename tests/{test_applicaton => test_application}/test_config/test_config.py (100%) rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_data/test_repo.py (98%) rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_data/test_repo_group.py (96%) rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_operations/test_user.py (98%) rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_operations/test_user_group.py (99%) rename tests/{test_applicaton => test_application}/test_db/test_models/test_augur_operations/test_user_repo.py (99%) rename tests/{test_applicaton => test_application}/test_db/test_session.py (100%) rename tests/{test_applicaton => test_application}/test_repo_load_controller/helper.py (100%) rename tests/{test_applicaton => test_application}/test_repo_load_controller/test_adding_orgs.py (80%) rename tests/{test_applicaton => test_application}/test_repo_load_controller/test_adding_repos.py (99%) rename tests/{test_applicaton => test_application}/test_repo_load_controller/test_helper_functions.py (99%) rename tests/{test_applicaton => test_application}/test_repo_load_controller/util.py (100%) rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_key_auth/test_github_random_key_auth.py (100%) rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_key_auth/test_random_key_auth.py (100%) rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_key_handler/test_github_api_key_handler.py (100%) rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_paginators/test_github_paginator.py (100%) rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_redis/test_redis_list.py (100%) rename tests/test_tasks/{test_task_utlities => test_task_utilities}/test_util/test_worker_util.py (100%) rename tests/test_workers/{worker_persistance => worker_persistence}/bad_Data.json (100%) rename tests/test_workers/{worker_persistance => worker_persistence}/contributors.json (100%) rename tests/test_workers/{worker_persistance => worker_persistence}/contributors_un_enriched.json (100%) rename tests/test_workers/{worker_persistance => worker_persistence}/standard_enrich_cntrb_id_data.json (100%) rename tests/test_workers/{worker_persistance => worker_persistence}/test_enrich_cntrb_id.py (97%) rename tests/test_workers/{worker_persistance => worker_persistence}/test_enrich_data_primary_keys.py (94%) rename tests/test_workers/{worker_persistance/util_persistance.py => worker_persistence/util_persistence.py} (80%) diff --git a/Makefile b/Makefile index 4fe926edc4..c00d789faa 100644 --- a/Makefile +++ b/Makefile @@ -99,7 +99,7 @@ test-data: test: # @ pytest tests/test_tasks/test_github_tasks/ @ python3 tests/start_server.py - @ pytest tests/test_metrics/test_metrics_functionality/ tests/test_routes/test_api_functionality/ tests/test_tasks/ tests/test_applicaton/ + @ pytest tests/test_metrics/test_metrics_functionality/ tests/test_routes/test_api_functionality/ tests/test_tasks/ tests/test_application/ @ python3 tests/stop_server.py test-api: diff --git a/pyproject.toml b/pyproject.toml index ddaed4301d..529771dca0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -180,7 +180,7 @@ legacy_tox_ini = """ metric-routes: python tests/test_routes/runner.py workers: pytest tests/test_workers/ classes: pytest tests/test_classes/ - worker-persistance: pytest test/test_workers/worker_persistance/ + worker-persistence: pytest test/test_workers/worker_persistence/ [pytest] addopts = -ra -s diff --git a/tests/test_applicaton/test_cli/test_add_cli_repos.py b/tests/test_application/test_cli/test_add_cli_repos.py similarity index 98% rename from tests/test_applicaton/test_cli/test_add_cli_repos.py rename to tests/test_application/test_cli/test_add_cli_repos.py index 42f342d2bd..7905308e0a 100644 --- a/tests/test_applicaton/test_cli/test_add_cli_repos.py +++ b/tests/test_application/test_cli/test_add_cli_repos.py @@ -1,7 +1,7 @@ import pytest import logging -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.util.repo_load_controller import RepoLoadController, CLI_USER_ID diff --git a/tests/test_applicaton/test_cli/test_cli_functionality.py b/tests/test_application/test_cli/test_cli_functionality.py similarity index 100% rename from tests/test_applicaton/test_cli/test_cli_functionality.py rename to tests/test_application/test_cli/test_cli_functionality.py diff --git a/tests/test_applicaton/test_config/test_config.py b/tests/test_application/test_config/test_config.py similarity index 100% rename from tests/test_applicaton/test_config/test_config.py rename to tests/test_application/test_config/test_config.py diff --git a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py b/tests/test_application/test_db/test_models/test_augur_data/test_repo.py similarity index 98% rename from tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py rename to tests/test_application/test_db/test_models/test_augur_data/test_repo.py index dd1ef44b79..fffe3e13ff 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo.py +++ b/tests/test_application/test_db/test_models/test_augur_data/test_repo.py @@ -5,7 +5,7 @@ from augur.application.db.session import DatabaseSession from augur.tasks.github.util.github_task_session import GithubTaskSession -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.application.db.models import Repo logger = logging.getLogger(__name__) diff --git a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo_group.py b/tests/test_application/test_db/test_models/test_augur_data/test_repo_group.py similarity index 96% rename from tests/test_applicaton/test_db/test_models/test_augur_data/test_repo_group.py rename to tests/test_application/test_db/test_models/test_augur_data/test_repo_group.py index 4367542db8..4c144bb8e1 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_data/test_repo_group.py +++ b/tests/test_application/test_db/test_models/test_augur_data/test_repo_group.py @@ -3,7 +3,7 @@ import sqlalchemy as s from augur.application.db.session import DatabaseSession -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.application.db.models import RepoGroup logger = logging.getLogger(__name__) diff --git a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user.py b/tests/test_application/test_db/test_models/test_augur_operations/test_user.py similarity index 98% rename from tests/test_applicaton/test_db/test_models/test_augur_operations/test_user.py rename to tests/test_application/test_db/test_models/test_augur_operations/test_user.py index 6011405b26..b5c0db623a 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user.py +++ b/tests/test_application/test_db/test_models/test_augur_operations/test_user.py @@ -5,7 +5,7 @@ from augur.application.db.session import DatabaseSession from augur.tasks.github.util.github_task_session import GithubTaskSession -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.application.db.models import User diff --git a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_group.py b/tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py similarity index 99% rename from tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_group.py rename to tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py index 70fa11ecb0..1bcac18aed 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_group.py +++ b/tests/test_application/test_db/test_models/test_augur_operations/test_user_group.py @@ -3,7 +3,7 @@ import sqlalchemy as s from augur.application.db.session import DatabaseSession -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.application.db.models import UserGroup logger = logging.getLogger(__name__) diff --git a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py b/tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py similarity index 99% rename from tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py rename to tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py index 4b288cbabb..da97fb4344 100644 --- a/tests/test_applicaton/test_db/test_models/test_augur_operations/test_user_repo.py +++ b/tests/test_application/test_db/test_models/test_augur_operations/test_user_repo.py @@ -5,7 +5,7 @@ from augur.application.db.session import DatabaseSession from augur.tasks.github.util.github_task_session import GithubTaskSession -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.application.db.models import UserRepo logger = logging.getLogger(__name__) diff --git a/tests/test_applicaton/test_db/test_session.py b/tests/test_application/test_db/test_session.py similarity index 100% rename from tests/test_applicaton/test_db/test_session.py rename to tests/test_application/test_db/test_session.py diff --git a/tests/test_applicaton/test_repo_load_controller/helper.py b/tests/test_application/test_repo_load_controller/helper.py similarity index 100% rename from tests/test_applicaton/test_repo_load_controller/helper.py rename to tests/test_application/test_repo_load_controller/helper.py diff --git a/tests/test_applicaton/test_repo_load_controller/test_adding_orgs.py b/tests/test_application/test_repo_load_controller/test_adding_orgs.py similarity index 80% rename from tests/test_applicaton/test_repo_load_controller/test_adding_orgs.py rename to tests/test_application/test_repo_load_controller/test_adding_orgs.py index 1b8effe568..f053959628 100644 --- a/tests/test_applicaton/test_repo_load_controller/test_adding_orgs.py +++ b/tests/test_application/test_repo_load_controller/test_adding_orgs.py @@ -1,7 +1,7 @@ import pytest import logging -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.util.repo_load_controller import RepoLoadController, DEFAULT_REPO_GROUP_IDS, CLI_USER_ID diff --git a/tests/test_applicaton/test_repo_load_controller/test_adding_repos.py b/tests/test_application/test_repo_load_controller/test_adding_repos.py similarity index 99% rename from tests/test_applicaton/test_repo_load_controller/test_adding_repos.py rename to tests/test_application/test_repo_load_controller/test_adding_repos.py index 7f65b1e017..366874455e 100644 --- a/tests/test_applicaton/test_repo_load_controller/test_adding_repos.py +++ b/tests/test_application/test_repo_load_controller/test_adding_repos.py @@ -1,7 +1,7 @@ import pytest import logging -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.util.repo_load_controller import RepoLoadController, DEFAULT_REPO_GROUP_IDS, CLI_USER_ID diff --git a/tests/test_applicaton/test_repo_load_controller/test_helper_functions.py b/tests/test_application/test_repo_load_controller/test_helper_functions.py similarity index 99% rename from tests/test_applicaton/test_repo_load_controller/test_helper_functions.py rename to tests/test_application/test_repo_load_controller/test_helper_functions.py index ab9222a920..a9dbd65763 100644 --- a/tests/test_applicaton/test_repo_load_controller/test_helper_functions.py +++ b/tests/test_application/test_repo_load_controller/test_helper_functions.py @@ -5,7 +5,7 @@ from augur.application.db.session import DatabaseSession from augur.tasks.github.util.github_task_session import GithubTaskSession -from tests.test_applicaton.test_repo_load_controller.helper import * +from tests.test_application.test_repo_load_controller.helper import * from augur.application.db.models import Repo, RepoGroup, UserRepo, UserGroup logger = logging.getLogger(__name__) diff --git a/tests/test_applicaton/test_repo_load_controller/util.py b/tests/test_application/test_repo_load_controller/util.py similarity index 100% rename from tests/test_applicaton/test_repo_load_controller/util.py rename to tests/test_application/test_repo_load_controller/util.py diff --git a/tests/test_tasks/test_task_utlities/test_key_auth/test_github_random_key_auth.py b/tests/test_tasks/test_task_utilities/test_key_auth/test_github_random_key_auth.py similarity index 100% rename from tests/test_tasks/test_task_utlities/test_key_auth/test_github_random_key_auth.py rename to tests/test_tasks/test_task_utilities/test_key_auth/test_github_random_key_auth.py diff --git a/tests/test_tasks/test_task_utlities/test_key_auth/test_random_key_auth.py b/tests/test_tasks/test_task_utilities/test_key_auth/test_random_key_auth.py similarity index 100% rename from tests/test_tasks/test_task_utlities/test_key_auth/test_random_key_auth.py rename to tests/test_tasks/test_task_utilities/test_key_auth/test_random_key_auth.py diff --git a/tests/test_tasks/test_task_utlities/test_key_handler/test_github_api_key_handler.py b/tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py similarity index 100% rename from tests/test_tasks/test_task_utlities/test_key_handler/test_github_api_key_handler.py rename to tests/test_tasks/test_task_utilities/test_key_handler/test_github_api_key_handler.py diff --git a/tests/test_tasks/test_task_utlities/test_paginators/test_github_paginator.py b/tests/test_tasks/test_task_utilities/test_paginators/test_github_paginator.py similarity index 100% rename from tests/test_tasks/test_task_utlities/test_paginators/test_github_paginator.py rename to tests/test_tasks/test_task_utilities/test_paginators/test_github_paginator.py diff --git a/tests/test_tasks/test_task_utlities/test_redis/test_redis_list.py b/tests/test_tasks/test_task_utilities/test_redis/test_redis_list.py similarity index 100% rename from tests/test_tasks/test_task_utlities/test_redis/test_redis_list.py rename to tests/test_tasks/test_task_utilities/test_redis/test_redis_list.py diff --git a/tests/test_tasks/test_task_utlities/test_util/test_worker_util.py b/tests/test_tasks/test_task_utilities/test_util/test_worker_util.py similarity index 100% rename from tests/test_tasks/test_task_utlities/test_util/test_worker_util.py rename to tests/test_tasks/test_task_utilities/test_util/test_worker_util.py diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py b/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py index c27ebf4ed3..0b3cda6621 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_endpoints.py @@ -1,5 +1,5 @@ #SPDX-License-Identifier: MIT -from tests.test_workers.worker_persistance.util_persistance import * +from tests.test_workers.worker_persistence.util_persistence import * import pandas as pd #from augur.cli import add_repos #from augur.cli import add_repo_groups diff --git a/tests/test_workers/test_set_up_fixtures.py b/tests/test_workers/test_set_up_fixtures.py index 3add1f83fb..4109bcda55 100644 --- a/tests/test_workers/test_set_up_fixtures.py +++ b/tests/test_workers/test_set_up_fixtures.py @@ -100,7 +100,7 @@ def database_connection(): # Define a dummy worker class that gets the methods we need without running super().__init__ -class DummyPersistance(Persistant): +class DummyPersistence(Persistent): def __init__(self, database_connection): self.db = database_connection self.logger = logging.getLogger() @@ -127,7 +127,7 @@ def __init__(self, database_connection, config={}): self.platform = "github" # first set up logging. - self._root_augur_dir = Persistant.ROOT_AUGUR_DIR + self._root_augur_dir = Persistent.ROOT_AUGUR_DIR self.augur_config = AugurConfig(self._root_augur_dir) # Get default logging settings @@ -146,7 +146,7 @@ def __init__(self, database_connection, config={}): self.tool_version = '\'1.0.1\'' self.data_source = '\'Worker test Data\'' - # This mirros the functionality of the definition found in worker_persistance to make + # This mirrors the functionality of the definition found in worker_persistence to make # github related function calls much much easier to test. def initialize_database_connections(self): DB_STR = 'postgresql://{}:{}@{}:{}/{}'.format( diff --git a/tests/test_workers/worker_persistance/bad_Data.json b/tests/test_workers/worker_persistence/bad_Data.json similarity index 100% rename from tests/test_workers/worker_persistance/bad_Data.json rename to tests/test_workers/worker_persistence/bad_Data.json diff --git a/tests/test_workers/worker_persistance/contributors.json b/tests/test_workers/worker_persistence/contributors.json similarity index 100% rename from tests/test_workers/worker_persistance/contributors.json rename to tests/test_workers/worker_persistence/contributors.json diff --git a/tests/test_workers/worker_persistance/contributors_un_enriched.json b/tests/test_workers/worker_persistence/contributors_un_enriched.json similarity index 100% rename from tests/test_workers/worker_persistance/contributors_un_enriched.json rename to tests/test_workers/worker_persistence/contributors_un_enriched.json diff --git a/tests/test_workers/worker_persistance/standard_enrich_cntrb_id_data.json b/tests/test_workers/worker_persistence/standard_enrich_cntrb_id_data.json similarity index 100% rename from tests/test_workers/worker_persistance/standard_enrich_cntrb_id_data.json rename to tests/test_workers/worker_persistence/standard_enrich_cntrb_id_data.json diff --git a/tests/test_workers/worker_persistance/test_enrich_cntrb_id.py b/tests/test_workers/worker_persistence/test_enrich_cntrb_id.py similarity index 97% rename from tests/test_workers/worker_persistance/test_enrich_cntrb_id.py rename to tests/test_workers/worker_persistence/test_enrich_cntrb_id.py index f7fd67bb5b..4dbcb4f91b 100644 --- a/tests/test_workers/worker_persistance/test_enrich_cntrb_id.py +++ b/tests/test_workers/worker_persistence/test_enrich_cntrb_id.py @@ -1,5 +1,5 @@ #SPDX-License-Identifier: MIT -from tests.test_workers.worker_persistance.util_persistance import * +from tests.test_workers.worker_persistence.util_persistence import * #WIP diff --git a/tests/test_workers/worker_persistance/test_enrich_data_primary_keys.py b/tests/test_workers/worker_persistence/test_enrich_data_primary_keys.py similarity index 94% rename from tests/test_workers/worker_persistance/test_enrich_data_primary_keys.py rename to tests/test_workers/worker_persistence/test_enrich_data_primary_keys.py index a13f5a639a..df7b98bca5 100644 --- a/tests/test_workers/worker_persistance/test_enrich_data_primary_keys.py +++ b/tests/test_workers/worker_persistence/test_enrich_data_primary_keys.py @@ -1,6 +1,6 @@ #SPDX-License-Identifier: MIT -from tests.test_workers.worker_persistance.util_persistance import * +from tests.test_workers.worker_persistence.util_persistence import * @@ -62,12 +62,12 @@ def test_enrich_data_primary_keys_standard_input(database_connection, sample_sou database_connection.execute(tableDict['contributors_table'].insert().values(cntrb)) #create class for enrichment - dummyPersistant = DummyPersistance(database_connection) + dummyPersistent = DummyPersistence(database_connection) gh_merge_fields = ['avatar_url'] augur_merge_fields = ['gh_avatar_url'] - dummyPersistant.enrich_data_primary_keys(sample_source_data_enriched, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields) + dummyPersistent.enrich_data_primary_keys(sample_source_data_enriched, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields) #now test each record to make sure that they have an avatar_url avatar_url_sql = s.sql.text(""" @@ -87,11 +87,11 @@ def test_enrich_data_primary_keys_bad_data(database_connection): augur_merge_fields = ['gh_avatar_url'] #create class for enrichment - dummyPersistant = DummyPersistance(database_connection) + dummyPersistent = DummyPersistence(database_connection) #Make sure that function rejects null data - assert dummyPersistant.enrich_data_primary_keys({}, "contributors_table", gh_merge_fields, augur_merge_fields) == {} - assert dummyPersistant.enrich_data_primary_keys(None, "contributors_table", gh_merge_fields, augur_merge_fields) == None + assert dummyPersistent.enrich_data_primary_keys({}, "contributors_table", gh_merge_fields, augur_merge_fields) == {} + assert dummyPersistent.enrich_data_primary_keys(None, "contributors_table", gh_merge_fields, augur_merge_fields) == None def test_enrich_data_primary_keys_redundant_enrich(database_connection,sample_source_data_enriched, sample_source_data_unenriched): @@ -151,12 +151,12 @@ def test_enrich_data_primary_keys_redundant_enrich(database_connection,sample_so database_connection.execute(tableDict['contributors_table'].insert().values(cntrb)) #create class for enrichment - dummyPersistant = DummyPersistance(database_connection) + dummyPersistent = DummyPersistence(database_connection) gh_merge_fields = ['avatar_url'] augur_merge_fields = ['gh_avatar_url'] - dummyPersistant.enrich_data_primary_keys(sample_source_data_enriched, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields) + dummyPersistent.enrich_data_primary_keys(sample_source_data_enriched, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields) #now test each record to make sure that they have an avatar_url avatar_url_sql = s.sql.text(""" @@ -229,11 +229,11 @@ def test_enrich_data_primary_keys_standard_input(database_connection, sample_sou database_connection.execute(tableDict['contributors_table'].insert().values(cntrb)) #create class for enrichment - dummyPersistant = DummyPersistance(database_connection) + dummyPersistent = DummyPersistence(database_connection) gh_merge_fields = ['avatar_url'] augur_merge_fields = ['gh_avatar_url'] - dummyPersistant.enrich_data_primary_keys(sample_source_data_bad_api_return, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields) + dummyPersistent.enrich_data_primary_keys(sample_source_data_bad_api_return, tableDict['contributors_table'], gh_merge_fields, augur_merge_fields) return \ No newline at end of file diff --git a/tests/test_workers/worker_persistance/util_persistance.py b/tests/test_workers/worker_persistence/util_persistence.py similarity index 80% rename from tests/test_workers/worker_persistance/util_persistance.py rename to tests/test_workers/worker_persistence/util_persistence.py index 118d652b7e..e1a9a6e982 100644 --- a/tests/test_workers/worker_persistance/util_persistance.py +++ b/tests/test_workers/worker_persistence/util_persistence.py @@ -8,7 +8,7 @@ #Sample source data generation that pulls json data that has contributions listed @pytest.fixture def sample_source_data_enriched(): - jsonFile = open("tests/test_workers/worker_persistance/contributors.json") + jsonFile = open("tests/test_workers/worker_persistence/contributors.json") source_data = json.load(jsonFile) @@ -18,7 +18,7 @@ def sample_source_data_enriched(): #Sample source data generation that opens json data that doesn't have contributions listed @pytest.fixture def sample_source_data_unenriched(): - jsonFile = open("tests/test_workers/worker_persistance/contributors_un_enriched.json") + jsonFile = open("tests/test_workers/worker_persistence/contributors_un_enriched.json") source_data = json.load(jsonFile) @@ -28,7 +28,7 @@ def sample_source_data_unenriched(): #Bad data that an api might return @pytest.fixture def sample_source_data_bad_api_return(): - jsonFile = open("tests/test_workers/worker_persistance/bad_Data.json") + jsonFile = open("tests/test_workers/worker_persistence/bad_Data.json") source_data = json.load(jsonFile) @@ -39,7 +39,7 @@ def sample_source_data_bad_api_return(): #Sample data for comments api return @pytest.fixture def sample_source_data_standard_github_comments(): - jsonFile = open("tests/test_workers/worker_persistance/standard_enrich_cntrb_id_data.json") + jsonFile = open("tests/test_workers/worker_persistence/standard_enrich_cntrb_id_data.json") source_data = json.load(jsonFile) From 6702c35cfbbf88b03b300e66723a3e5a322f4a9c Mon Sep 17 00:00:00 2001 From: Xiaoha Date: Wed, 12 Nov 2025 17:03:52 +0000 Subject: [PATCH 005/104] refactor: Remove payload index to match Augur conventions - All JSON/JSONB fields in Augur have NO indexes - Verified: repo_badging.data (JSONB), chaoss_metric_status.cm_info (JSON), etc. - payload is used for display, not filtering - Query performance relies on ix_tme_repo_ts and ix_tme_event indexes Signed-off-by: Xiaoha --- .../alembic/versions/36_add_topic_model_event.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/augur/application/schema/alembic/versions/36_add_topic_model_event.py b/augur/application/schema/alembic/versions/36_add_topic_model_event.py index 8edcd90609..a2cb78ffd3 100644 --- a/augur/application/schema/alembic/versions/36_add_topic_model_event.py +++ b/augur/application/schema/alembic/versions/36_add_topic_model_event.py @@ -46,13 +46,11 @@ def upgrade(): "ix_tme_repo_ts", "topic_model_event", ["repo_id", "ts"], schema="augur_data" ) op.create_index("ix_tme_event", "topic_model_event", ["event"], schema="augur_data") + # btree index on payload for exact match queries (following Augur conventions) + # Note: btree only supports equality comparison, not JSON containment queries op.create_index( - "ix_tme_payload", - "topic_model_event", - [sa.text("(payload)")], - unique=False, - schema="augur_data", - postgresql_using="gin", + "ix_tme_payload", "topic_model_event", ["payload"], + unique=False, schema="augur_data" ) From a96e62ff7ffebf4b74cd4987344807234e770ec4 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 12 Nov 2025 13:08:06 -0500 Subject: [PATCH 006/104] fix incorrect path for worker persistence in pytest configuration Signed-off-by: Shlok Gilda --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 529771dca0..801ac54574 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -180,7 +180,7 @@ legacy_tox_ini = """ metric-routes: python tests/test_routes/runner.py workers: pytest tests/test_workers/ classes: pytest tests/test_classes/ - worker-persistence: pytest test/test_workers/worker_persistence/ + worker-persistence: pytest tests/test_workers/worker_persistence/ [pytest] addopts = -ra -s From c64246264928be71fd90014dbb879d9963956c64 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 10 Nov 2025 17:09:00 -0500 Subject: [PATCH 007/104] Detect docker environments and ensure gunicorn error logs end up in dockers log stream Signed-off-by: Adrian Edwards --- augur/api/gunicorn_conf.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/augur/api/gunicorn_conf.py b/augur/api/gunicorn_conf.py index 4e97d7b465..dd1bfc6961 100644 --- a/augur/api/gunicorn_conf.py +++ b/augur/api/gunicorn_conf.py @@ -39,9 +39,15 @@ # set the log location for gunicorn logs_directory = get_value('Logging', 'logs_directory') + +is_docker = os.getenv("AUGUR_DOCKER_DEPLOY").lower() in ('true', '1', 't', 'y', 'yes') accesslog = f"{logs_directory}/gunicorn.log" errorlog = f"{logs_directory}/gunicorn.log" +# If deploying via docker, include gunicorn error logs in the docker log stream by sending it to stdout +if is_docker: + errorlog = '-' + ssl_bool = get_value('Server', 'ssl') if ssl_bool is True: From c952f662abe24c742e22b41f35a614d56f41f017 Mon Sep 17 00:00:00 2001 From: Xiaoha Date: Wed, 12 Nov 2025 20:25:12 +0000 Subject: [PATCH 008/104] fix: Use timezone-aware timestamps for topic modeling schema - set training_start_time/end_time/data_collection_date to TIMESTAMPTZ - update TopicModelMeta ORM to use timezone-aware columns - align topic_model_event ts column with TIMESTAMPTZ requirement - satisfies maintainer request for timezone data storage Signed-off-by: Xiaoha --- augur/application/db/models/augur_data.py | 6 +++--- .../alembic/versions/35_create_topic_model_meta_table.py | 6 +++--- .../schema/alembic/versions/36_add_topic_model_event.py | 9 +-------- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 9751cfb5f4..ddf11e0532 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -3687,12 +3687,12 @@ class TopicModelMeta(Base): comment="JSON object containing visualization data for the model" ) training_start_time = Column( - TIMESTAMP(), + TIMESTAMP(timezone=True), nullable=False, comment="When training started" ) training_end_time = Column( - TIMESTAMP(), + TIMESTAMP(timezone=True), nullable=False, comment="When training ended" ) @@ -3700,7 +3700,7 @@ class TopicModelMeta(Base): tool_version = Column(String, comment="Standard Augur Metadata") data_source = Column(String, comment="Standard Augur Metadata") data_collection_date = Column( - TIMESTAMP(precision=0), + TIMESTAMP(timezone=True, precision=0), server_default=text("CURRENT_TIMESTAMP") ) diff --git a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py index 9e4a00c3cc..b1235365ba 100644 --- a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py +++ b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py @@ -49,9 +49,9 @@ def upgrade(): sa.Column('visualization_data', postgresql.JSONB(), nullable=True), # Timestamps (NOT NULL with defaults) - sa.Column('training_start_time', sa.TIMESTAMP(), nullable=False), - sa.Column('training_end_time', sa.TIMESTAMP(), nullable=False), - sa.Column('data_collection_date', sa.TIMESTAMP(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('training_start_time', postgresql.TIMESTAMP(timezone=True), nullable=False), + sa.Column('training_end_time', postgresql.TIMESTAMP(timezone=True), nullable=False), + sa.Column('data_collection_date', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), # Standard Augur metadata (NOT NULL) sa.Column('tool_source', sa.String(), nullable=False), diff --git a/augur/application/schema/alembic/versions/36_add_topic_model_event.py b/augur/application/schema/alembic/versions/36_add_topic_model_event.py index a2cb78ffd3..cfc7e5e813 100644 --- a/augur/application/schema/alembic/versions/36_add_topic_model_event.py +++ b/augur/application/schema/alembic/versions/36_add_topic_model_event.py @@ -22,7 +22,7 @@ def upgrade(): sa.Column("event_id", sa.BigInteger(), primary_key=True), sa.Column( "ts", - sa.TIMESTAMP(), + postgresql.TIMESTAMP(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False, ), @@ -46,16 +46,9 @@ def upgrade(): "ix_tme_repo_ts", "topic_model_event", ["repo_id", "ts"], schema="augur_data" ) op.create_index("ix_tme_event", "topic_model_event", ["event"], schema="augur_data") - # btree index on payload for exact match queries (following Augur conventions) - # Note: btree only supports equality comparison, not JSON containment queries - op.create_index( - "ix_tme_payload", "topic_model_event", ["payload"], - unique=False, schema="augur_data" - ) def downgrade(): - op.drop_index("ix_tme_payload", table_name="topic_model_event", schema="augur_data") op.drop_index("ix_tme_event", table_name="topic_model_event", schema="augur_data") op.drop_index("ix_tme_repo_ts", table_name="topic_model_event", schema="augur_data") op.drop_table("topic_model_event", schema="augur_data") From aa67f9b42b495fca3e08deb6a5b345208f080a42 Mon Sep 17 00:00:00 2001 From: Xiaoha Date: Wed, 12 Nov 2025 23:42:34 +0000 Subject: [PATCH 009/104] chore: rely on SQLAlchemy TIMESTAMP type with timezone - switch Alembic migrations to use sa.TIMESTAMP(timezone=True) - keeps timezone support while avoiding Postgres-specific type import Signed-off-by: Xiaoha --- .../alembic/versions/35_create_topic_model_meta_table.py | 6 +++--- .../schema/alembic/versions/36_add_topic_model_event.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py index b1235365ba..042155556f 100644 --- a/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py +++ b/augur/application/schema/alembic/versions/35_create_topic_model_meta_table.py @@ -49,9 +49,9 @@ def upgrade(): sa.Column('visualization_data', postgresql.JSONB(), nullable=True), # Timestamps (NOT NULL with defaults) - sa.Column('training_start_time', postgresql.TIMESTAMP(timezone=True), nullable=False), - sa.Column('training_end_time', postgresql.TIMESTAMP(timezone=True), nullable=False), - sa.Column('data_collection_date', postgresql.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), + sa.Column('training_start_time', sa.TIMESTAMP(timezone=True), nullable=False), + sa.Column('training_end_time', sa.TIMESTAMP(timezone=True), nullable=False), + sa.Column('data_collection_date', sa.TIMESTAMP(timezone=True), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=False), # Standard Augur metadata (NOT NULL) sa.Column('tool_source', sa.String(), nullable=False), diff --git a/augur/application/schema/alembic/versions/36_add_topic_model_event.py b/augur/application/schema/alembic/versions/36_add_topic_model_event.py index cfc7e5e813..5359e8f10d 100644 --- a/augur/application/schema/alembic/versions/36_add_topic_model_event.py +++ b/augur/application/schema/alembic/versions/36_add_topic_model_event.py @@ -22,7 +22,7 @@ def upgrade(): sa.Column("event_id", sa.BigInteger(), primary_key=True), sa.Column( "ts", - postgresql.TIMESTAMP(timezone=True), + sa.TIMESTAMP(timezone=True), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False, ), From 46e5b69fa40396fd4258acaff7256b3a83d52f8a Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Tue, 4 Nov 2025 21:56:20 -0500 Subject: [PATCH 010/104] add flexible column order support for CSV imports - add csv_utils.py with intelligent header detection - refactor add-repos and add-repo-groups commands to use new CSV parser - support both header and headerless CSV formats - add automatic column detection for headerless CSVs - add 10MB file size limit with clear error message - update sample CSV files to include headers Fixes #3310 Signed-off-by: Shlok Gilda --- augur/application/cli/csv_utils.py | 272 ++++++++++++++++++ augur/application/cli/db.py | 168 ++++++++--- .../schema/repo_group_load_sample.csv | 1 + augur/application/schema/repo_load_sample.csv | 1 + .../test_repo_groups.csv | 1 + .../test_repos.csv | 1 + 6 files changed, 399 insertions(+), 45 deletions(-) create mode 100644 augur/application/cli/csv_utils.py diff --git a/augur/application/cli/csv_utils.py b/augur/application/cli/csv_utils.py new file mode 100644 index 0000000000..2c0a2fcff3 --- /dev/null +++ b/augur/application/cli/csv_utils.py @@ -0,0 +1,272 @@ +# SPDX-License-Identifier: MIT +""" +CSV processing utilities for Augur CLI +""" +import csv +import logging +import os +from typing import Dict, List, Tuple + +logger = logging.getLogger(__name__) + +# Constants +MAX_FILE_SIZE_MB = 10 +MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 + + +class CSVProcessingError(Exception): + """Raised when CSV processing fails.""" + + pass + + +def check_file_size(filename: str) -> None: + """Validate file size is under limit""" + size = os.path.getsize(filename) + if size > MAX_FILE_SIZE_BYTES: + size_mb = size / (1024 * 1024) + raise CSVProcessingError( + f"File size ({size_mb:.1f}MB) exceeds {MAX_FILE_SIZE_MB}MB limit. " + f"Consider splitting into smaller batches." + ) + + +def detect_headers(first_row: List[str], expected_columns: set) -> bool: + """Detect if first row contains column headers""" + normalized = {col.strip().lower() for col in first_row} + return expected_columns.issubset(normalized) + + +def detect_column_mapping_repos(rows: List[List[str]]) -> Dict[str, int]: + """Detect which column contains URLs vs IDs for headerless repo CSVs""" + from augur.application.db.models import Repo + + if not rows or len(rows[0]) != 2: + raise CSVProcessingError( + "Expected 2 columns (repo_url, repo_group_id). " + f"Found {len(rows[0]) if rows else 0} columns." + ) + + # Sample first 10 rows to determine column types + sample_size = min(10, len(rows)) + sample_rows = rows[:sample_size] + + # Test each column to see if it contains URLs + for col_idx in [0, 1]: + col_values = [row[col_idx] for row in sample_rows] + + # Count how many values in this column parse as valid git URLs + url_matches = 0 + for value in col_values: + value = value.strip() + github_parse = Repo.parse_github_repo_url(value) + gitlab_parse = Repo.parse_gitlab_repo_url(value) + + if github_parse != (None, None) or gitlab_parse != (None, None): + url_matches += 1 + + # If >80% of values are valid URLs, this is the URL column + match_rate = url_matches / len(col_values) + if match_rate >= 0.8: + url_col = col_idx + id_col = 1 - col_idx # The other column + return {"repo_url": url_col, "repo_group_id": id_col} + + raise CSVProcessingError( + "Could not detect column types. Ensure CSV contains valid git repository URLs. " + "Or add headers: repo_url,repo_group_id" + ) + + +def detect_column_mapping_repo_groups(rows: List[List[str]]) -> Dict[str, int]: + """Detect which column contains IDs vs names for headerless repo group CSVs""" + if not rows or len(rows[0]) != 2: + raise CSVProcessingError( + "Expected 2 columns (repo_group_id, repo_group_name). " + f"Found {len(rows[0]) if rows else 0} columns." + ) + + # Sample first 10 rows + sample_size = min(10, len(rows)) + sample_rows = rows[:sample_size] + + # Test each column to see if it contains integers + for col_idx in [0, 1]: + col_values = [row[col_idx] for row in sample_rows] + + # Count how many values are positive integers + int_matches = 0 + for value in col_values: + try: + if int(value.strip()) > 0: + int_matches += 1 + except (ValueError, AttributeError): + pass + + # If >80% of values are integers, this is the ID column + match_rate = int_matches / len(col_values) + if match_rate >= 0.8: + id_col = col_idx + name_col = 1 - col_idx # The other column + return {"repo_group_id": id_col, "repo_group_name": name_col} + + raise CSVProcessingError( + "Could not detect column types. Ensure CSV has valid format. " + "Or add headers: repo_group_id,repo_group_name" + ) + + +def process_repo_csv(filename: str) -> List[Dict[str, str]]: + """Process repository CSV file with intelligent header detection""" + check_file_size(filename) + + rows = [] + + with open(filename, "r", newline="") as f: + # Read first line to detect headers + first_line = f.readline() + f.seek(0) + + first_row = next(csv.reader([first_line])) + has_headers = detect_headers(first_row, {"repo_url", "repo_group_id"}) + + if has_headers: + logger.info("CSV has headers, using DictReader") + reader = csv.DictReader(f) + + # Normalize fieldnames + reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames] + + # Validate required columns present + required = {"repo_url", "repo_group_id"} + if not required.issubset(set(reader.fieldnames)): + missing = required - set(reader.fieldnames) + raise CSVProcessingError( + f"Missing required columns: {missing}. " + f"Expected: repo_url, repo_group_id" + ) + + for line_num, row in enumerate(reader, start=2): + row_normalized = {k.strip().lower(): v.strip() for k, v in row.items()} + rows.append(row_normalized) + + else: + logger.info("CSV has no headers, using intelligent column detection") + # Read all rows + all_rows = list(csv.reader(f)) + + if not all_rows: + raise CSVProcessingError("CSV file is empty") + + # Detect which column is which + col_mapping = detect_column_mapping_repos(all_rows) + + # Convert to dicts + for line_num, row in enumerate(all_rows, start=1): + if len(row) != 2: + logger.warning( + f"Line {line_num}: Expected 2 columns, got {len(row)}, skipping" + ) + continue + + row_dict = { + "repo_url": row[col_mapping["repo_url"]].strip(), + "repo_group_id": row[col_mapping["repo_group_id"]].strip(), + } + rows.append(row_dict) + + logger.info(f"Parsed {len(rows)} rows from CSV") + return rows + + +def process_repo_group_csv(filename: str) -> List[Dict[str, str]]: + """Process repository group CSV file with intelligent header detection""" + check_file_size(filename) + + rows = [] + + with open(filename, "r", newline="") as f: + # Read first line to detect headers + first_line = f.readline() + f.seek(0) + + first_row = next(csv.reader([first_line])) + has_headers = detect_headers(first_row, {"repo_group_id", "repo_group_name"}) + + if has_headers: + logger.info("CSV has headers, using DictReader") + reader = csv.DictReader(f) + + # Normalize fieldnames + reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames] + + # Validate required columns present + required = {"repo_group_id", "repo_group_name"} + if not required.issubset(set(reader.fieldnames)): + missing = required - set(reader.fieldnames) + raise CSVProcessingError( + f"Missing required columns: {missing}. " + f"Expected: repo_group_id, repo_group_name" + ) + + for line_num, row in enumerate(reader, start=2): + row_normalized = {k.strip().lower(): v.strip() for k, v in row.items()} + + # Skip empty rows + if not row_normalized.get("repo_group_id") or not row_normalized.get( + "repo_group_name" + ): + continue + + rows.append(row_normalized) + + else: + logger.info("CSV has no headers, using intelligent column detection") + # Read all rows + all_rows = list(csv.reader(f)) + + if not all_rows: + raise CSVProcessingError("CSV file is empty") + + # Detect which column is which + col_mapping = detect_column_mapping_repo_groups(all_rows) + + # Convert to dicts + for line_num, row in enumerate(all_rows, start=1): + if len(row) != 2: + logger.warning( + f"Line {line_num}: Expected 2 columns, got {len(row)}, skipping" + ) + continue + + # Skip empty rows + if not row[0].strip() or not row[1].strip(): + continue + + row_dict = { + "repo_group_id": row[col_mapping["repo_group_id"]].strip(), + "repo_group_name": row[col_mapping["repo_group_name"]].strip(), + } + rows.append(row_dict) + + logger.info(f"Parsed {len(rows)} rows from CSV") + return rows + + +def write_rejection_file(filename: str, rejections: List[Tuple[Dict, str]]) -> str: + """Write rejected rows to a .rejected.csv file""" + if not rejections: + return None + + rejection_file = f"{filename}.rejected.csv" + + with open(rejection_file, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(["original_data", "rejection_reason"]) + + for row_dict, reason in rejections: + original_data = ",".join(str(v) for v in row_dict.values()) + writer.writerow([original_data, reason]) + + logger.info(f"Wrote {len(rejections)} rejections to {rejection_file}") + return rejection_file diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py index c20fcf0b2e..20fec42412 100644 --- a/augur/application/cli/db.py +++ b/augur/application/cli/db.py @@ -1,12 +1,11 @@ # SPDX-License-Identifier: MIT import os -from os import environ, chmod, path, getenv, stat +from os import environ, chmod, path, getenv import logging from sys import exit from subprocess import call import random import string -import csv import click import sqlalchemy as s import pandas as pd @@ -25,6 +24,12 @@ from sqlalchemy import update from datetime import datetime from augur.application.db.models import Repo +from augur.application.cli.csv_utils import ( + process_repo_csv, + process_repo_group_csv, + write_rejection_file, + CSVProcessingError, +) logger = logging.getLogger(__name__) @@ -42,9 +47,14 @@ def cli(ctx): @with_database @click.pass_context def add_repos(ctx, filename): - """Add repositories to Augur's database. + """Add repositories to Augur's database from a CSV file. + + The CSV file can have headers (recommended): + repo_url,repo_group_id + https://github.com/chaoss/augur.git,10 - The .csv file format should be repo_url,group_id + Or no headers (backward compatible - column order will be auto-detected): + https://github.com/chaoss/augur.git,10 NOTE: The Group ID must already exist in the REPO_Groups Table. @@ -55,30 +65,59 @@ def add_repos(ctx, filename): with GithubTaskSession(logger, engine=ctx.obj.engine) as session: controller = RepoLoadController(session) - line_total = len(open(filename).readlines()) - with open(filename) as upload_repos_file: - data = csv.reader(upload_repos_file, delimiter=",") - for line_num, row in enumerate(data): - repo_data = {} - repo_data["url"] = row[0] + try: + # Parse CSV (handles headers and column detection) + rows = process_repo_csv(filename) + + if not rows: + logger.error("No valid rows found in CSV file") + return + + logger.info(f"Processing {len(rows)} repositories...") + + # Process each row using EXISTING logic + successful = 0 + rejections = [] + + for row in rows: try: - repo_data["repo_group_id"] = int(row[1]) - except ValueError: - print( - f"Invalid repo group_id: {row[1]} for Git url: `{repo_data['url']}`" - ) + repo_data = { + "url": row["repo_url"], + "repo_group_id": int(row["repo_group_id"]), + } + except (ValueError, KeyError) as e: + logger.warning(f"Invalid data format: {row}, error: {e}") + rejections.append((row, f"Invalid format: {e}")) continue print( - f"Inserting repo {line_num}/{line_total} with Git URL `{repo_data['url']}` into repo group {repo_data['repo_group_id']}" + f"Inserting repo with Git URL `{repo_data['url']}` into repo group {repo_data['repo_group_id']}" ) succeeded, message = controller.add_cli_repo(repo_data) - if not succeeded: - logger.error(f"insert repo failed with error: {message['status']}`") - else: + if succeeded: + successful += 1 logger.info(f"Repo added: {repo_data}") print("Success") + else: + logger.error(f"insert repo failed with error: {message['status']}") + rejections.append((row, f"Failed to add repo: {message['status']}")) + + logger.info(f"Successfully added {successful} repositories") + + if rejections: + rejection_file = write_rejection_file(filename, rejections) + logger.warning( + f"{len(rejections)} repositories failed. " + f"See {rejection_file} for details." + ) + + except CSVProcessingError as e: + logger.error(f"CSV processing error: {e}") + return + except Exception as e: + logger.error(f"Unexpected error: {e}") + raise @cli.command("get-repo-groups") @@ -113,40 +152,79 @@ def add_repo_groups(ctx, filename): """ Create new repo groups in Augur's database """ - with ctx.obj.engine.begin() as connection: - df = pd.read_sql( - s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups"), - connection, - ) - repo_group_IDs = df["repo_group_id"].values.tolist() - - insert_repo_group_sql = s.sql.text( + try: + # Parse CSV (handles headers and column detection) + rows = process_repo_group_csv(filename) + + if not rows: + logger.error("No valid rows found in CSV file") + return + + logger.info(f"Processing {len(rows)} repository groups...") + + with ctx.obj.engine.begin() as connection: + # Get existing repo group IDs + df = pd.read_sql( + s.sql.text("SELECT repo_group_id FROM augur_data.repo_groups"), + connection, + ) + repo_group_IDs = df["repo_group_id"].values.tolist() + + insert_repo_group_sql = s.sql.text( + """ + INSERT INTO "augur_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP); """ - INSERT INTO "augur_data"."repo_groups"("repo_group_id", "rg_name", "rg_description", "rg_website", "rg_recache", "rg_last_modified", "rg_type", "tool_source", "tool_version", "data_source", "data_collection_date") VALUES (:repo_group_id, :repo_group_name, '', '', 0, CURRENT_TIMESTAMP, 'Unknown', 'Loaded by user', '1.0', 'Git', CURRENT_TIMESTAMP); - """ - ) + ) + + # Process each row + successful = 0 + rejections = [] + + for row in rows: + try: + group_id = int(row["repo_group_id"]) + group_name = row["repo_group_name"] + except (ValueError, KeyError) as e: + logger.warning(f"Invalid data format: {row}, error: {e}") + rejections.append((row, f"Invalid format: {e}")) + continue - with open(filename) as create_repo_groups_file: - data = csv.reader(create_repo_groups_file, delimiter=",") - for row in data: - # Handle case where there's a hanging empty row. - if not row: - logger.info("Skipping empty data...") + # Check if already exists + if group_id in repo_group_IDs: + logger.info(f"Repo group {group_id} already exists, skipping") continue - logger.info(f"Inserting repo group with values {row}...") - if int(row[0]) not in repo_group_IDs: - repo_group_IDs.append(int(row[0])) + try: + logger.info( + f"Inserting repo group: ID={group_id}, Name={group_name}" + ) connection.execute( insert_repo_group_sql.bindparams( - repo_group_id=int(row[0]), - repo_group_name=row[1], + repo_group_id=group_id, + repo_group_name=group_name, ) ) - else: - logger.info( - f"Repo group with ID {row[1]} for repo group {row[1]} already exists, skipping..." - ) + successful += 1 + repo_group_IDs.append(group_id) + except Exception as e: + logger.error(f"Failed to insert repo group {group_id}: {e}") + rejections.append((row, f"Database error: {e}")) + + logger.info(f"Successfully added {successful} repository groups") + + if rejections: + rejection_file = write_rejection_file(filename, rejections) + logger.warning( + f"{len(rejections)} groups failed. " + f"See {rejection_file} for details." + ) + + except CSVProcessingError as e: + logger.error(f"CSV processing error: {e}") + return + except Exception as e: + logger.error(f"Unexpected error: {e}") + raise @cli.command("add-github-org") diff --git a/augur/application/schema/repo_group_load_sample.csv b/augur/application/schema/repo_group_load_sample.csv index ab27193e2a..8b5ada1413 100644 --- a/augur/application/schema/repo_group_load_sample.csv +++ b/augur/application/schema/repo_group_load_sample.csv @@ -1,2 +1,3 @@ +repo_group_id,repo_group_name 10,Repo Group 1 20,Repo Group 2 \ No newline at end of file diff --git a/augur/application/schema/repo_load_sample.csv b/augur/application/schema/repo_load_sample.csv index fb537d4949..ee11bb5ad5 100644 --- a/augur/application/schema/repo_load_sample.csv +++ b/augur/application/schema/repo_load_sample.csv @@ -1,3 +1,4 @@ +repo_url,repo_group_id https://github.com/chaoss/augur.git,10 https://github.com/chaoss/grimoirelab.git,10 https://github.com/chaoss/wg-evolution.git,20 diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv index 8001523560..463da48bde 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv @@ -1,2 +1,3 @@ +repo_group_id,repo_group_name 10,Repo Group 1 20,Repo Group 2 diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv index fb537d4949..ee11bb5ad5 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv @@ -1,3 +1,4 @@ +repo_url,repo_group_id https://github.com/chaoss/augur.git,10 https://github.com/chaoss/grimoirelab.git,10 https://github.com/chaoss/wg-evolution.git,20 From b2d061437cfbad68441496fdb061ba3c4f369c4e Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 5 Nov 2025 14:12:24 -0500 Subject: [PATCH 011/104] remove header rows from test CSV files for repo groups and repos Signed-off-by: Shlok Gilda --- .../test_facade_contributor_interface/test_repo_groups.csv | 1 - .../test_facade/test_facade_contributor_interface/test_repos.csv | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv index 463da48bde..8001523560 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repo_groups.csv @@ -1,3 +1,2 @@ -repo_group_id,repo_group_name 10,Repo Group 1 20,Repo Group 2 diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv index ee11bb5ad5..fb537d4949 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv @@ -1,4 +1,3 @@ -repo_url,repo_group_id https://github.com/chaoss/augur.git,10 https://github.com/chaoss/grimoirelab.git,10 https://github.com/chaoss/wg-evolution.git,20 From 515adc1bf5d8efd884d4b2d9accf3eee25d52bbf Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 5 Nov 2025 15:24:58 -0500 Subject: [PATCH 012/104] improve CSV processing error handling and logging in db commands Signed-off-by: Shlok Gilda --- augur/application/cli/csv_utils.py | 333 +++++++++++------------------ augur/application/cli/db.py | 26 +-- 2 files changed, 140 insertions(+), 219 deletions(-) diff --git a/augur/application/cli/csv_utils.py b/augur/application/cli/csv_utils.py index 2c0a2fcff3..af7821c35a 100644 --- a/augur/application/cli/csv_utils.py +++ b/augur/application/cli/csv_utils.py @@ -5,268 +5,195 @@ import csv import logging import os -from typing import Dict, List, Tuple logger = logging.getLogger(__name__) -# Constants MAX_FILE_SIZE_MB = 10 MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 -class CSVProcessingError(Exception): - """Raised when CSV processing fails.""" +def validate_git_url(value: str) -> bool: + """Validate if value is a valid git repository URL""" - pass + from augur.application.db.models import Repo + + value = value.strip() + github_parse = Repo.parse_github_repo_url(value) + gitlab_parse = Repo.parse_gitlab_repo_url(value) + return github_parse != (None, None) or gitlab_parse != (None, None) -def check_file_size(filename: str) -> None: - """Validate file size is under limit""" - size = os.path.getsize(filename) - if size > MAX_FILE_SIZE_BYTES: - size_mb = size / (1024 * 1024) - raise CSVProcessingError( - f"File size ({size_mb:.1f}MB) exceeds {MAX_FILE_SIZE_MB}MB limit. " - f"Consider splitting into smaller batches." - ) +def validate_positive_int(value: str) -> bool: + """Validate if value is a positive integer""" + try: + return int(value.strip()) > 0 + except (ValueError, AttributeError): + return False -def detect_headers(first_row: List[str], expected_columns: set) -> bool: - """Detect if first row contains column headers""" - normalized = {col.strip().lower() for col in first_row} - return expected_columns.issubset(normalized) +def detect_column_order(sample_rows: list, validators: dict) -> dict: + """Detect column order by testing validators against sample data.""" -def detect_column_mapping_repos(rows: List[List[str]]) -> Dict[str, int]: - """Detect which column contains URLs vs IDs for headerless repo CSVs""" - from augur.application.db.models import Repo - - if not rows or len(rows[0]) != 2: - raise CSVProcessingError( - "Expected 2 columns (repo_url, repo_group_id). " - f"Found {len(rows[0]) if rows else 0} columns." + if not sample_rows or len(sample_rows[0]) != len(validators): + raise ValueError( + f"Expected {len(validators)} columns. " + f"Found {len(sample_rows[0]) if sample_rows else 0} columns." ) # Sample first 10 rows to determine column types - sample_size = min(10, len(rows)) - sample_rows = rows[:sample_size] - - # Test each column to see if it contains URLs - for col_idx in [0, 1]: - col_values = [row[col_idx] for row in sample_rows] - - # Count how many values in this column parse as valid git URLs - url_matches = 0 - for value in col_values: - value = value.strip() - github_parse = Repo.parse_github_repo_url(value) - gitlab_parse = Repo.parse_gitlab_repo_url(value) - - if github_parse != (None, None) or gitlab_parse != (None, None): - url_matches += 1 - - # If >80% of values are valid URLs, this is the URL column - match_rate = url_matches / len(col_values) - if match_rate >= 0.8: - url_col = col_idx - id_col = 1 - col_idx # The other column - return {"repo_url": url_col, "repo_group_id": id_col} - - raise CSVProcessingError( - "Could not detect column types. Ensure CSV contains valid git repository URLs. " - "Or add headers: repo_url,repo_group_id" - ) - + sample_size = min(10, len(sample_rows)) + sample_data = sample_rows[:sample_size] + + # Try to match each validator to a column using 80% threshold + column_mapping = {} + used_indices = set() + + for col_name, validator in validators.items(): + best_match_idx = None + + # Test each column + for col_idx in range(len(sample_data[0])): + if col_idx in used_indices: + continue + + # Count how many values in this column pass validation + matches = 0 + for row in sample_data: + if col_idx < len(row) and validator(row[col_idx]): + matches += 1 + + # If >80% of values pass validation, this is the correct column + match_rate = matches / len(sample_data) + if match_rate >= 0.8: + best_match_idx = col_idx + break + + if best_match_idx is not None: + column_mapping[col_name] = best_match_idx + used_indices.add(best_match_idx) + else: + # No match found for this column + raise ValueError( + f"Could not detect column '{col_name}'. " + f"Ensure CSV has valid format or add headers: {', '.join(validators.keys())}" + ) -def detect_column_mapping_repo_groups(rows: List[List[str]]) -> Dict[str, int]: - """Detect which column contains IDs vs names for headerless repo group CSVs""" - if not rows or len(rows[0]) != 2: - raise CSVProcessingError( - "Expected 2 columns (repo_group_id, repo_group_name). " - f"Found {len(rows[0]) if rows else 0} columns." - ) + return column_mapping - # Sample first 10 rows - sample_size = min(10, len(rows)) - sample_rows = rows[:sample_size] - - # Test each column to see if it contains integers - for col_idx in [0, 1]: - col_values = [row[col_idx] for row in sample_rows] - - # Count how many values are positive integers - int_matches = 0 - for value in col_values: - try: - if int(value.strip()) > 0: - int_matches += 1 - except (ValueError, AttributeError): - pass - - # If >80% of values are integers, this is the ID column - match_rate = int_matches / len(col_values) - if match_rate >= 0.8: - id_col = col_idx - name_col = 1 - col_idx # The other column - return {"repo_group_id": id_col, "repo_group_name": name_col} - - raise CSVProcessingError( - "Could not detect column types. Ensure CSV has valid format. " - "Or add headers: repo_group_id,repo_group_name" - ) +def process_csv(filename: str, expected_columns: dict) -> list: + """ + Generic CSV processor with header detection. -def process_repo_csv(filename: str) -> List[Dict[str, str]]: - """Process repository CSV file with intelligent header detection""" - check_file_size(filename) + Uses DictReader for both header and headerless CSVs by detecting column order + and reassigning fieldnames when necessary. + """ + + # Validate file size + size = os.path.getsize(filename) + if size > MAX_FILE_SIZE_BYTES: + size_mb = size / (1024 * 1024) + raise ValueError( + f"File size ({size_mb:.1f}MB) exceeds {MAX_FILE_SIZE_MB}MB limit. " + f"Consider splitting into smaller batches." + ) rows = [] with open(filename, "r", newline="") as f: - # Read first line to detect headers - first_line = f.readline() - f.seek(0) - - first_row = next(csv.reader([first_line])) - has_headers = detect_headers(first_row, {"repo_url", "repo_group_id"}) - - if has_headers: - logger.info("CSV has headers, using DictReader") - reader = csv.DictReader(f) - - # Normalize fieldnames - reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames] - - # Validate required columns present - required = {"repo_url", "repo_group_id"} - if not required.issubset(set(reader.fieldnames)): - missing = required - set(reader.fieldnames) - raise CSVProcessingError( - f"Missing required columns: {missing}. " - f"Expected: repo_url, repo_group_id" - ) - - for line_num, row in enumerate(reader, start=2): - row_normalized = {k.strip().lower(): v.strip() for k, v in row.items()} - rows.append(row_normalized) + # Create DictReader - it will auto-read first row as fieldnames + reader = csv.DictReader(f) - else: - logger.info("CSV has no headers, using intelligent column detection") - # Read all rows - all_rows = list(csv.reader(f)) + # Check if auto-detected fieldnames are actual headers or data + detected_fieldnames = reader.fieldnames + if detected_fieldnames is None: + raise ValueError("CSV file is empty") - if not all_rows: - raise CSVProcessingError("CSV file is empty") + # Normalize and check if they match expected columns + normalized_fieldnames = {fn.strip().lower() for fn in detected_fieldnames} + expected_column_names = set(expected_columns.keys()) - # Detect which column is which - col_mapping = detect_column_mapping_repos(all_rows) - - # Convert to dicts - for line_num, row in enumerate(all_rows, start=1): - if len(row) != 2: - logger.warning( - f"Line {line_num}: Expected 2 columns, got {len(row)}, skipping" - ) - continue - - row_dict = { - "repo_url": row[col_mapping["repo_url"]].strip(), - "repo_group_id": row[col_mapping["repo_group_id"]].strip(), - } - rows.append(row_dict) - - logger.info(f"Parsed {len(rows)} rows from CSV") - return rows - - -def process_repo_group_csv(filename: str) -> List[Dict[str, str]]: - """Process repository group CSV file with intelligent header detection""" - check_file_size(filename) - - rows = [] - - with open(filename, "r", newline="") as f: - # Read first line to detect headers - first_line = f.readline() - f.seek(0) - - first_row = next(csv.reader([first_line])) - has_headers = detect_headers(first_row, {"repo_group_id", "repo_group_name"}) + has_headers = expected_column_names.issubset(normalized_fieldnames) if has_headers: + # Headers exist - proceed normally with DictReader logger.info("CSV has headers, using DictReader") - reader = csv.DictReader(f) - # Normalize fieldnames + # Normalize fieldnames for consistent access reader.fieldnames = [fn.strip().lower() for fn in reader.fieldnames] # Validate required columns present - required = {"repo_group_id", "repo_group_name"} - if not required.issubset(set(reader.fieldnames)): - missing = required - set(reader.fieldnames) - raise CSVProcessingError( + if not expected_column_names.issubset(set(reader.fieldnames)): + missing = expected_column_names - set(reader.fieldnames) + raise ValueError( f"Missing required columns: {missing}. " - f"Expected: repo_group_id, repo_group_name" + f"Expected: {', '.join(expected_column_names)}" ) - for line_num, row in enumerate(reader, start=2): + # Process all rows + for row in reader: row_normalized = {k.strip().lower(): v.strip() for k, v in row.items()} - - # Skip empty rows - if not row_normalized.get("repo_group_id") or not row_normalized.get( - "repo_group_name" - ): - continue - rows.append(row_normalized) else: + # No headers - detected_fieldnames are actually data logger.info("CSV has no headers, using intelligent column detection") - # Read all rows + + # We need to: + # 1. Read more rows to sample for column detection + # 2. Detect column order + # 3. Process first row (which is in detected_fieldnames) manually + # 4. Continue with remaining rows + + # Seek back to start and read all rows as raw data + f.seek(0) all_rows = list(csv.reader(f)) if not all_rows: - raise CSVProcessingError("CSV file is empty") + raise ValueError("CSV file is empty") - # Detect which column is which - col_mapping = detect_column_mapping_repo_groups(all_rows) + # Detect column order using sample rows + col_mapping = detect_column_order(all_rows, expected_columns) - # Convert to dicts - for line_num, row in enumerate(all_rows, start=1): - if len(row) != 2: + # Process all rows with detected column order + for row in all_rows: + if len(row) != len(expected_columns): logger.warning( - f"Line {line_num}: Expected 2 columns, got {len(row)}, skipping" + f"Expected {len(expected_columns)} columns, got {len(row)}, skipping" ) continue - # Skip empty rows - if not row[0].strip() or not row[1].strip(): - continue + # Build dict using detected column mapping + row_dict = {} + for col_name, col_idx in col_mapping.items(): + row_dict[col_name] = row[col_idx].strip() - row_dict = { - "repo_group_id": row[col_mapping["repo_group_id"]].strip(), - "repo_group_name": row[col_mapping["repo_group_name"]].strip(), - } rows.append(row_dict) logger.info(f"Parsed {len(rows)} rows from CSV") return rows -def write_rejection_file(filename: str, rejections: List[Tuple[Dict, str]]) -> str: - """Write rejected rows to a .rejected.csv file""" - if not rejections: - return None - - rejection_file = f"{filename}.rejected.csv" +def process_repo_csv(filename: str) -> list: + """Process repository CSV file with intelligent header detection""" - with open(rejection_file, "w", newline="") as f: - writer = csv.writer(f) - writer.writerow(["original_data", "rejection_reason"]) + return process_csv( + filename, + expected_columns={ + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + }, + ) - for row_dict, reason in rejections: - original_data = ",".join(str(v) for v in row_dict.values()) - writer.writerow([original_data, reason]) - logger.info(f"Wrote {len(rejections)} rejections to {rejection_file}") - return rejection_file +def process_repo_group_csv(filename: str) -> list: + """Process repository group CSV file with intelligent header detection""" + + return process_csv( + filename, + expected_columns={ + "repo_group_id": validate_positive_int, + "repo_group_name": lambda v: bool(v.strip()), + }, + ) diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py index 20fec42412..e0df763dab 100644 --- a/augur/application/cli/db.py +++ b/augur/application/cli/db.py @@ -27,8 +27,6 @@ from augur.application.cli.csv_utils import ( process_repo_csv, process_repo_group_csv, - write_rejection_file, - CSVProcessingError, ) logger = logging.getLogger(__name__) @@ -79,7 +77,7 @@ def add_repos(ctx, filename): successful = 0 rejections = [] - for row in rows: + for idx, row in enumerate(rows, start=1): try: repo_data = { "url": row["repo_url"], @@ -91,7 +89,7 @@ def add_repos(ctx, filename): continue print( - f"Inserting repo with Git URL `{repo_data['url']}` into repo group {repo_data['repo_group_id']}" + f"Inserting repo {idx}/{len(rows)} with Git URL `{repo_data['url']}` into repo group {repo_data['repo_group_id']}" ) succeeded, message = controller.add_cli_repo(repo_data) @@ -106,13 +104,11 @@ def add_repos(ctx, filename): logger.info(f"Successfully added {successful} repositories") if rejections: - rejection_file = write_rejection_file(filename, rejections) - logger.warning( - f"{len(rejections)} repositories failed. " - f"See {rejection_file} for details." - ) + logger.warning(f"{len(rejections)} repositories failed:") + for row_data, reason in rejections: + logger.warning(f" - {row_data}: {reason}") - except CSVProcessingError as e: + except ValueError as e: logger.error(f"CSV processing error: {e}") return except Exception as e: @@ -213,13 +209,11 @@ def add_repo_groups(ctx, filename): logger.info(f"Successfully added {successful} repository groups") if rejections: - rejection_file = write_rejection_file(filename, rejections) - logger.warning( - f"{len(rejections)} groups failed. " - f"See {rejection_file} for details." - ) + logger.warning(f"{len(rejections)} repository groups failed:") + for row_data, reason in rejections: + logger.warning(f" - {row_data}: {reason}") - except CSVProcessingError as e: + except ValueError as e: logger.error(f"CSV processing error: {e}") return except Exception as e: From b42333f62591726c7533b933676ba8a7c329c749 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Sat, 8 Nov 2025 14:28:03 -0500 Subject: [PATCH 013/104] enhance type annotations and docstrings for CSV processing functions in cli Signed-off-by: Shlok Gilda --- augur/application/cli/csv_utils.py | 105 ++++++++++++++++++++++++----- augur/application/cli/db.py | 30 +++++++-- 2 files changed, 112 insertions(+), 23 deletions(-) diff --git a/augur/application/cli/csv_utils.py b/augur/application/cli/csv_utils.py index af7821c35a..e55835f6e5 100644 --- a/augur/application/cli/csv_utils.py +++ b/augur/application/cli/csv_utils.py @@ -5,6 +5,7 @@ import csv import logging import os +from typing import Callable logger = logging.getLogger(__name__) @@ -13,10 +14,19 @@ def validate_git_url(value: str) -> bool: - """Validate if value is a valid git repository URL""" + """Validate if value is a valid git repository URL. + Checks if the provided string is a valid GitHub or GitLab repository URL + using the Repo model's URL parsing methods. + + Args: + value: String to validate as a git repository URL + + Returns: + True if the value is a valid GitHub or GitLab URL, False otherwise + """ from augur.application.db.models import Repo - + value = value.strip() github_parse = Repo.parse_github_repo_url(value) gitlab_parse = Repo.parse_gitlab_repo_url(value) @@ -24,17 +34,40 @@ def validate_git_url(value: str) -> bool: def validate_positive_int(value: str) -> bool: - """Validate if value is a positive integer""" + """Validate if value is a positive integer. + + Args: + value: String to validate as a positive integer + Returns: + True if the value can be converted to a positive integer, False otherwise + """ try: return int(value.strip()) > 0 except (ValueError, AttributeError): return False -def detect_column_order(sample_rows: list, validators: dict) -> dict: - """Detect column order by testing validators against sample data.""" +def detect_column_order( + sample_rows: list[list[str]], validators: dict[str, Callable[[str], bool]] +) -> dict[str, int]: + """Detect column order by testing validators against sample data. + For headerless CSV files, this function determines which column index + corresponds to which expected field by validating sample data against + validator functions. Uses an 80% threshold for matching. + + Args: + sample_rows: List of rows from CSV file, where each row is a list of strings + validators: Dictionary mapping column names to validator functions + + Returns: + Dictionary mapping column names to their detected column indices + + Raises: + ValueError: If column count doesn't match expected validators or if + a column cannot be detected with sufficient confidence + """ if not sample_rows or len(sample_rows[0]) != len(validators): raise ValueError( f"Expected {len(validators)} columns. " @@ -82,14 +115,28 @@ def detect_column_order(sample_rows: list, validators: dict) -> dict: return column_mapping -def process_csv(filename: str, expected_columns: dict) -> list: - """ - Generic CSV processor with header detection. +def process_csv( + filename: str, expected_columns: dict[str, Callable[[str], bool]] +) -> list[dict[str, str]]: + """Generic CSV processor with header detection. + + Processes CSV files with or without headers by automatically detecting the + column order. For files with headers, uses DictReader directly. For headerless + files, detects column order by validating sample data against expected validators. - Uses DictReader for both header and headerless CSVs by detecting column order - and reassigning fieldnames when necessary. + Args: + filename: Path to the CSV file to process + expected_columns: Dictionary mapping column names to validator functions + that check if a value is valid for that column + + Returns: + List of dictionaries, where each dictionary represents a row with + column names as keys and cell values as strings + + Raises: + ValueError: If file is empty, exceeds size limit, has wrong number of + columns, or missing required headers """ - # Validate file size size = os.path.getsize(filename) if size > MAX_FILE_SIZE_BYTES: @@ -99,7 +146,7 @@ def process_csv(filename: str, expected_columns: dict) -> list: f"Consider splitting into smaller batches." ) - rows = [] + rows: list[dict[str, str]] = [] with open(filename, "r", newline="") as f: # Create DictReader - it will auto-read first row as fieldnames @@ -175,9 +222,21 @@ def process_csv(filename: str, expected_columns: dict) -> list: return rows -def process_repo_csv(filename: str) -> list: - """Process repository CSV file with intelligent header detection""" +def process_repo_csv(filename: str) -> list[dict[str, str]]: + """Process repository CSV file with header detection. + + Processes a CSV file containing repository information with columns for + repo_url and repo_group_id. Supports both header and headerless formats. + Args: + filename: Path to the repository CSV file + + Returns: + List of dictionaries with keys 'repo_url' and 'repo_group_id' + + Raises: + ValueError: If file format is invalid or columns cannot be detected + """ return process_csv( filename, expected_columns={ @@ -187,9 +246,21 @@ def process_repo_csv(filename: str) -> list: ) -def process_repo_group_csv(filename: str) -> list: - """Process repository group CSV file with intelligent header detection""" - +def process_repo_group_csv(filename: str) -> list[dict[str, str]]: + """Process repository group CSV file with header detection. + + Processes a CSV file containing repository group information with columns + for repo_group_id and repo_group_name. Supports both header and headerless formats. + + Args: + filename: Path to the repository group CSV file + + Returns: + List of dictionaries with keys 'repo_group_id' and 'repo_group_name' + + Raises: + ValueError: If file format is invalid or columns cannot be detected + """ return process_csv( filename, expected_columns={ diff --git a/augur/application/cli/db.py b/augur/application/cli/db.py index e0df763dab..8d5408eaea 100644 --- a/augur/application/cli/db.py +++ b/augur/application/cli/db.py @@ -44,7 +44,7 @@ def cli(ctx): @test_db_connection @with_database @click.pass_context -def add_repos(ctx, filename): +def add_repos(ctx: click.Context, filename: str) -> None: """Add repositories to Augur's database from a CSV file. The CSV file can have headers (recommended): @@ -56,7 +56,18 @@ def add_repos(ctx, filename): NOTE: The Group ID must already exist in the REPO_Groups Table. - If you want to add an entire GitHub organization, refer to the command: augur db add-github-org""" + Args: + ctx: Click context object containing the database engine + filename: Path to the CSV file containing repository data + + Raises: + ValueError: If CSV file is malformed or exceeds size limit + Exception: For database connection or other unexpected errors + + Note: + If you want to add an entire GitHub organization, refer to the + command: augur db add-github-org + """ from augur.tasks.github.util.github_task_session import GithubTaskSession from augur.util.repo_load_controller import RepoLoadController @@ -121,7 +132,7 @@ def add_repos(ctx, filename): @test_db_connection @with_database @click.pass_context -def get_repo_groups(ctx): +def get_repo_groups(ctx: click.Context) -> pd.DataFrame: """ List all repo groups and their associated IDs """ @@ -144,9 +155,16 @@ def get_repo_groups(ctx): @test_db_connection @with_database @click.pass_context -def add_repo_groups(ctx, filename): - """ - Create new repo groups in Augur's database +def add_repo_groups(ctx: click.Context, filename: str) -> None: + """Create new repo groups in Augur's database from a CSV file. + + Args: + ctx: Click context object containing the database engine + filename: Path to the CSV file containing repository group data + + Raises: + ValueError: If CSV file is malformed or exceeds size limit + Exception: For database connection or other unexpected errors """ try: # Parse CSV (handles headers and column detection) From 9fff7d8ad766b1387b67145165c8af04abd9c069 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Sat, 8 Nov 2025 14:28:11 -0500 Subject: [PATCH 014/104] add unit tests for CSV processing utilities including validation and error handling Signed-off-by: Shlok Gilda --- .../test_cli/test_csv_utils.py | 357 ++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 tests/test_application/test_cli/test_csv_utils.py diff --git a/tests/test_application/test_cli/test_csv_utils.py b/tests/test_application/test_cli/test_csv_utils.py new file mode 100644 index 0000000000..c584345ced --- /dev/null +++ b/tests/test_application/test_cli/test_csv_utils.py @@ -0,0 +1,357 @@ +# SPDX-License-Identifier: MIT +"""Unit tests for CSV processing utilities""" + +import pytest +from unittest.mock import patch + +from augur.application.cli.csv_utils import ( + validate_git_url, + validate_positive_int, + detect_column_order, + process_csv, + process_repo_csv, + process_repo_group_csv, + MAX_FILE_SIZE_BYTES, +) + + +class TestValidateGitUrl: + """Tests for validate_git_url function""" + + def test_valid_github_url(self): + """Test validation of valid GitHub URLs""" + assert validate_git_url("https://github.com/chaoss/augur") + assert validate_git_url("https://github.com/chaoss/augur.git") + assert validate_git_url(" https://github.com/chaoss/augur ") # with whitespace + + def test_valid_gitlab_url(self): + """Test validation of valid GitLab URLs""" + assert validate_git_url("https://gitlab.com/chaoss/augur") + assert validate_git_url("https://gitlab.com/chaoss/augur.git") + + def test_invalid_url(self): + """Test validation of invalid URLs""" + assert not validate_git_url("not-a-url") + assert not validate_git_url("https://example.com") + assert not validate_git_url("123") + assert not validate_git_url("") + + def test_whitespace_handling(self): + """Test that whitespace is properly stripped""" + assert validate_git_url(" https://github.com/chaoss/augur ") + + +class TestValidatePositiveInt: + """Tests for validate_positive_int function""" + + def test_valid_positive_integers(self): + """Test validation of valid positive integers""" + assert validate_positive_int("1") + assert validate_positive_int("42") + assert validate_positive_int("9999") + assert validate_positive_int(" 123 ") # with whitespace + + def test_zero_is_invalid(self): + """Test that zero is not considered a positive integer""" + assert not validate_positive_int("0") + + def test_negative_numbers_invalid(self): + """Test that negative numbers are invalid""" + assert not validate_positive_int("-1") + assert not validate_positive_int("-42") + + def test_non_numeric_invalid(self): + """Test that non-numeric strings are invalid""" + assert not validate_positive_int("abc") + assert not validate_positive_int("12.5") + assert not validate_positive_int("") + assert not validate_positive_int("1a") + + def test_whitespace_handling(self): + """Test that whitespace is properly stripped""" + assert validate_positive_int(" 42 ") + + +class TestDetectColumnOrder: + """Tests for detect_column_order function""" + + def test_simple_column_detection(self): + """Test basic column order detection""" + sample_rows = [ + ["https://github.com/chaoss/augur", "10"], + ["https://github.com/user/repo", "20"], + ] + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = detect_column_order(sample_rows, validators) + assert result == {"repo_url": 0, "repo_group_id": 1} + + def test_reversed_column_order(self): + """Test detection with reversed column order""" + sample_rows = [ + ["10", "https://github.com/chaoss/augur"], + ["20", "https://github.com/user/repo"], + ] + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = detect_column_order(sample_rows, validators) + assert result == {"repo_url": 1, "repo_group_id": 0} + + def test_threshold_detection(self): + """Test that detection uses 80% threshold correctly""" + # 8 out of 10 rows valid (80% exactly) + sample_rows = [ + ["https://github.com/chaoss/augur", "10"], + ["https://github.com/user/repo1", "20"], + ["https://github.com/user/repo2", "30"], + ["https://github.com/user/repo3", "40"], + ["https://github.com/user/repo4", "50"], + ["https://github.com/user/repo5", "60"], + ["https://github.com/user/repo6", "70"], + ["https://github.com/user/repo7", "80"], + ["invalid-url", "90"], # Invalid + ["also-invalid", "100"], # Invalid + ] + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = detect_column_order(sample_rows, validators) + assert result == {"repo_url": 0, "repo_group_id": 1} + + def test_empty_rows_raises_error(self): + """Test that empty sample rows raises ValueError""" + with pytest.raises(ValueError, match="Expected .* columns"): + detect_column_order([], {"col1": lambda x: True}) + + def test_wrong_column_count_raises_error(self): + """Test that wrong column count raises ValueError""" + sample_rows = [["val1", "val2", "val3"]] + validators = {"col1": lambda x: True, "col2": lambda x: True} + + with pytest.raises(ValueError, match="Expected 2 columns.*Found 3"): + detect_column_order(sample_rows, validators) + + def test_no_match_found_raises_error(self): + """Test that failure to detect a column raises ValueError""" + sample_rows = [ + ["invalid", "invalid"], + ["invalid", "invalid"], + ] + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + with pytest.raises(ValueError, match="Could not detect column"): + detect_column_order(sample_rows, validators) + + +class TestProcessCsv: + """Tests for process_csv function""" + + def test_csv_with_headers(self, tmp_path): + """Test processing CSV file with headers""" + csv_file = tmp_path / "test.csv" + csv_file.write_text("repo_url,repo_group_id\nhttps://github.com/chaoss/augur,10\nhttps://github.com/user/repo,20") + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = process_csv(str(csv_file), validators) + assert len(result) == 2 + assert result[0] == {"repo_url": "https://github.com/chaoss/augur", "repo_group_id": "10"} + assert result[1] == {"repo_url": "https://github.com/user/repo", "repo_group_id": "20"} + + def test_csv_without_headers(self, tmp_path): + """Test processing CSV file without headers""" + csv_file = tmp_path / "test.csv" + csv_file.write_text("https://github.com/chaoss/augur,10\nhttps://github.com/user/repo,20") + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = process_csv(str(csv_file), validators) + assert len(result) == 2 + assert result[0] == {"repo_url": "https://github.com/chaoss/augur", "repo_group_id": "10"} + assert result[1] == {"repo_url": "https://github.com/user/repo", "repo_group_id": "20"} + + def test_csv_with_different_column_order(self, tmp_path): + """Test processing CSV with columns in different order""" + csv_file = tmp_path / "test.csv" + csv_file.write_text("repo_group_id,repo_url\n10,https://github.com/chaoss/augur") + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = process_csv(str(csv_file), validators) + assert len(result) == 1 + assert result[0] == {"repo_url": "https://github.com/chaoss/augur", "repo_group_id": "10"} + + def test_empty_csv_raises_error(self, tmp_path): + """Test that empty CSV file raises ValueError""" + csv_file = tmp_path / "empty.csv" + csv_file.write_text("") + + validators = {"col1": lambda x: True} + + with pytest.raises(ValueError, match="empty"): + process_csv(str(csv_file), validators) + + def test_file_size_limit_with_mock(self, tmp_path): + """Test file size limit enforcement using mock""" + csv_file = tmp_path / "test.csv" + csv_file.write_text("repo_url,repo_group_id\nhttps://github.com/chaoss/augur,10") + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + # Mock os.path.getsize to return a size larger than limit + with patch('os.path.getsize', return_value=MAX_FILE_SIZE_BYTES + 1): + with pytest.raises(ValueError, match="exceeds.*limit"): + process_csv(str(csv_file), validators) + + def test_missing_required_headers_raises_error(self, tmp_path): + """Test that missing required headers raises ValueError""" + csv_file = tmp_path / "test.csv" + csv_file.write_text("wrong_column,another_column\nvalue1,value2") + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + with pytest.raises(ValueError, match="Could not detect column"): + process_csv(str(csv_file), validators) + + def test_whitespace_in_values(self, tmp_path): + """Test that whitespace in values is properly stripped""" + csv_file = tmp_path / "test.csv" + csv_file.write_text("repo_url,repo_group_id\n https://github.com/chaoss/augur , 10 ") + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = process_csv(str(csv_file), validators) + assert result[0] == {"repo_url": "https://github.com/chaoss/augur", "repo_group_id": "10"} + + +class TestProcessRepoCsv: + """Tests for process_repo_csv function""" + + def test_process_valid_repo_csv(self, tmp_path): + """Test processing a valid repository CSV""" + csv_file = tmp_path / "repos.csv" + csv_file.write_text("repo_url,repo_group_id\nhttps://github.com/chaoss/augur,10") + + result = process_repo_csv(str(csv_file)) + assert len(result) == 1 + assert result[0]["repo_url"] == "https://github.com/chaoss/augur" + assert result[0]["repo_group_id"] == "10" + + def test_process_repo_csv_without_headers(self, tmp_path): + """Test processing repository CSV without headers""" + csv_file = tmp_path / "repos.csv" + csv_file.write_text("https://github.com/chaoss/augur,10\nhttps://github.com/user/repo,20") + + result = process_repo_csv(str(csv_file)) + assert len(result) == 2 + + +class TestProcessRepoGroupCsv: + """Tests for process_repo_group_csv function""" + + def test_process_valid_repo_group_csv(self, tmp_path): + """Test processing a valid repository group CSV""" + csv_file = tmp_path / "groups.csv" + csv_file.write_text("repo_group_id,repo_group_name\n10,CHAOSS") + + result = process_repo_group_csv(str(csv_file)) + assert len(result) == 1 + assert result[0]["repo_group_id"] == "10" + assert result[0]["repo_group_name"] == "CHAOSS" + + def test_process_repo_group_csv_without_headers(self, tmp_path): + """Test processing repository group CSV without headers""" + csv_file = tmp_path / "groups.csv" + csv_file.write_text("10,CHAOSS\n20,OpenSource") + + result = process_repo_group_csv(str(csv_file)) + assert len(result) == 2 + assert result[0]["repo_group_name"] == "CHAOSS" + assert result[1]["repo_group_name"] == "OpenSource" + + def test_empty_group_name_invalid(self, tmp_path): + """Test that empty repository group names are handled""" + csv_file = tmp_path / "groups.csv" + csv_file.write_text("repo_group_id,repo_group_name\n10,ValidName\n20,") + + # This should process the file, but the row with empty name should fail validation + # during the detect_column_order phase if there aren't enough valid rows + result = process_repo_group_csv(str(csv_file)) + # Both rows should be parsed; validation happens at application level + assert len(result) >= 1 + + +class TestEdgeCases: + """Tests for edge cases and error conditions""" + + def test_single_row_csv(self, tmp_path): + """Test processing CSV with single row""" + csv_file = tmp_path / "single.csv" + csv_file.write_text("https://github.com/chaoss/augur,10") + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = process_csv(str(csv_file), validators) + assert len(result) == 1 + + def test_csv_with_extra_whitespace_in_headers(self, tmp_path): + """Test CSV with whitespace in header names""" + csv_file = tmp_path / "test.csv" + csv_file.write_text(" repo_url , repo_group_id \nhttps://github.com/chaoss/augur,10") + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = process_csv(str(csv_file), validators) + assert len(result) == 1 + assert result[0]["repo_url"] == "https://github.com/chaoss/augur" + + def test_many_rows_csv(self, tmp_path): + """Test processing CSV with many rows""" + csv_file = tmp_path / "many.csv" + lines = ["repo_url,repo_group_id"] + for i in range(100): + lines.append(f"https://github.com/user/repo{i},{i+1}") + csv_file.write_text("\n".join(lines)) + + validators = { + "repo_url": validate_git_url, + "repo_group_id": validate_positive_int, + } + + result = process_csv(str(csv_file), validators) + assert len(result) == 100 From 15b2dcc6e7838b30cd360d19cb8f36e1c3def94b Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 12 Nov 2025 13:22:06 -0500 Subject: [PATCH 015/104] move RepoLoadController within the database session context Signed-off-by: Shlok Gilda --- augur/application/db/models/augur_operations.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index bb29c7571a..45ac1d8167 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -566,9 +566,7 @@ def get_group_repo_count(self, group_name, search = None): from augur.util.repo_load_controller import RepoLoadController with DatabaseSession(logger) as session: - controller = RepoLoadController(session) - - result = controller.get_repo_count(source="group", group_name=group_name, user=self, search=search) + result = RepoLoadController(session).get_repo_count(source="group", group_name=group_name, user=self, search=search) return result From 382e7b7f581833189b3e5dd64b6acb2546942ccd Mon Sep 17 00:00:00 2001 From: Xiaoha Date: Thu, 13 Nov 2025 20:14:19 +0000 Subject: [PATCH 016/104] Add TopicModelEvent ORM model to augur_data.py Migration 36 created the topic_model_event table in the database, but the corresponding SQLAlchemy model was not added to augur_data.py. This caused ORM-level access to the event table to fail. This commit adds the TopicModelEvent class with: - All table columns (event_id, ts, repo_id, model_id, event, level, payload) - Index definitions for ix_tme_repo_ts and ix_tme_event - Foreign key constraints to repo and topic_model_meta tables - Relationship mappings to Repo and TopicModelMeta models This enables the application to query and manipulate topic modeling events through the ORM layer. Related: augur/application/schema/alembic/versions/36_add_topic_model_event.py Signed-off-by: Xiaoha --- augur/application/db/models/augur_data.py | 55 +++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index ddf11e0532..921f9f5336 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -3705,3 +3705,58 @@ class TopicModelMeta(Base): ) repo = relationship("Repo") + + +class TopicModelEvent(Base): + __tablename__ = "topic_model_event" + __table_args__ = ( + Index("ix_tme_repo_ts", "repo_id", "ts"), + Index("ix_tme_event", "event"), + {"schema": "augur_data"} + ) + + event_id = Column( + BigInteger, + primary_key=True, + comment="Unique identifier for the event" + ) + ts = Column( + TIMESTAMP(timezone=True), + nullable=False, + server_default=text("CURRENT_TIMESTAMP"), + comment="Timestamp when the event occurred" + ) + repo_id = Column( + ForeignKey("augur_data.repo.repo_id", name="fk_tme_repo_id"), + nullable=True, + comment="Repository associated with this event" + ) + model_id = Column( + UUID(as_uuid=True), + ForeignKey( + "augur_data.topic_model_meta.model_id", + name="fk_tme_model_id", + ondelete="SET NULL" + ), + nullable=True, + comment="Topic model associated with this event" + ) + event = Column( + Text, + nullable=False, + comment="Event type or name" + ) + level = Column( + Text, + nullable=False, + server_default=text("'INFO'"), + comment="Log level (INFO, WARNING, ERROR, etc.)" + ) + payload = Column( + JSONB, + nullable=False, + comment="Event payload data" + ) + + repo = relationship("Repo") + topic_model = relationship("TopicModelMeta") From 2f83a2e05e2453f86011ff284cf698cf131c4a22 Mon Sep 17 00:00:00 2001 From: Xiaoha Date: Thu, 13 Nov 2025 20:22:18 +0000 Subject: [PATCH 017/104] Add explicit Integer type to repo_id column Ensure repo_id column type matches migration definition (sa.Integer) for complete schema consistency between ORM and database. Signed-off-by: Xiaoha --- augur/application/db/models/augur_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 921f9f5336..034a2bec01 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -3727,6 +3727,7 @@ class TopicModelEvent(Base): comment="Timestamp when the event occurred" ) repo_id = Column( + Integer, ForeignKey("augur_data.repo.repo_id", name="fk_tme_repo_id"), nullable=True, comment="Repository associated with this event" From 6a406fd929abc574ea98a41edc06d9ebc8e0a63a Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 14 Nov 2025 17:11:45 -0500 Subject: [PATCH 018/104] Create a migration to synchronize the topic model tables Signed-off-by: Adrian Edwards --- .../37_sync_topic_model_migrations.py | 361 ++++++++++++++++++ 1 file changed, 361 insertions(+) create mode 100644 augur/application/schema/alembic/versions/37_sync_topic_model_migrations.py diff --git a/augur/application/schema/alembic/versions/37_sync_topic_model_migrations.py b/augur/application/schema/alembic/versions/37_sync_topic_model_migrations.py new file mode 100644 index 0000000000..6a076a750d --- /dev/null +++ b/augur/application/schema/alembic/versions/37_sync_topic_model_migrations.py @@ -0,0 +1,361 @@ +"""sync topic model migrations because Revisions 35 and 36 did not perfectly match their associated SQLAlchemy class models. + +Revision ID: 37 +Revises: 36 +Create Date: 2025-11-14 17:09:14.156057 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '37' +down_revision = '36' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('topic_model_event', 'event_id', + existing_type=sa.BIGINT(), + comment='Unique identifier for the event', + existing_nullable=False, + autoincrement=True, + schema='augur_data') + op.alter_column('topic_model_event', 'ts', + existing_type=postgresql.TIMESTAMP(timezone=True), + comment='Timestamp when the event occurred', + existing_nullable=False, + existing_server_default=sa.text('CURRENT_TIMESTAMP'), + schema='augur_data') + op.alter_column('topic_model_event', 'repo_id', + existing_type=sa.INTEGER(), + comment='Repository associated with this event', + existing_nullable=True, + schema='augur_data') + op.alter_column('topic_model_event', 'model_id', + existing_type=sa.UUID(), + comment='Topic model associated with this event', + existing_nullable=True, + schema='augur_data') + op.alter_column('topic_model_event', 'event', + existing_type=sa.TEXT(), + comment='Event type or name', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_event', 'level', + existing_type=sa.TEXT(), + comment='Log level (INFO, WARNING, ERROR, etc.)', + existing_nullable=False, + existing_server_default=sa.text("'INFO'::text"), + schema='augur_data') + op.alter_column('topic_model_event', 'payload', + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment='Event payload data', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'model_id', + existing_type=sa.UUID(), + comment='Unique identifier for the topic model', + existing_nullable=False, + existing_server_default=sa.text('gen_random_uuid()'), + schema='augur_data') + op.alter_column('topic_model_meta', 'repo_id', + existing_type=sa.INTEGER(), + type_=sa.BigInteger(), + comment='Repository this model was trained on', + existing_nullable=True, + schema='augur_data') + op.alter_column('topic_model_meta', 'model_method', + existing_type=sa.VARCHAR(), + comment="Method used for topic modeling (e.g., 'NMF_COUNT', 'LDA_TFIDF')", + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'num_topics', + existing_type=sa.INTEGER(), + comment='Number of topics in the model', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'num_words_per_topic', + existing_type=sa.INTEGER(), + comment='Number of words per topic', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'training_parameters', + existing_type=postgresql.JSONB(astext_type=sa.Text()), + type_=sa.JSON(), + comment='JSON object containing training parameters', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'model_file_paths', + existing_type=postgresql.JSONB(astext_type=sa.Text()), + type_=sa.JSON(), + comment='JSON object containing paths to model artifacts', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'parameters_hash', + existing_type=sa.VARCHAR(), + comment='Hash of parameters for deduplication', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'coherence_score', + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment='Coherence score of the model', + existing_nullable=False, + existing_server_default=sa.text('0.0'), + schema='augur_data') + op.alter_column('topic_model_meta', 'perplexity_score', + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment='Perplexity score of the model', + existing_nullable=False, + existing_server_default=sa.text('0.0'), + schema='augur_data') + op.alter_column('topic_model_meta', 'topic_diversity', + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment='Topic diversity score', + existing_nullable=False, + existing_server_default=sa.text('0.0'), + schema='augur_data') + op.alter_column('topic_model_meta', 'quality', + existing_type=postgresql.JSONB(astext_type=sa.Text()), + type_=sa.JSON(), + comment='Quality metrics', + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + schema='augur_data') + op.alter_column('topic_model_meta', 'training_message_count', + existing_type=sa.BIGINT(), + comment='Number of messages used for training', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'data_fingerprint', + existing_type=postgresql.JSONB(astext_type=sa.Text()), + type_=sa.JSON(), + comment='Fingerprint of training data', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'visualization_data', + existing_type=postgresql.JSONB(astext_type=sa.Text()), + type_=sa.JSON(), + comment='JSON object containing visualization data for the model', + existing_nullable=True, + schema='augur_data') + op.alter_column('topic_model_meta', 'training_start_time', + existing_type=postgresql.TIMESTAMP(timezone=True), + comment='When training started', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'training_end_time', + existing_type=postgresql.TIMESTAMP(timezone=True), + comment='When training ended', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'tool_source', + existing_type=sa.VARCHAR(), + nullable=True, + comment='Standard Augur Metadata', + schema='augur_data') + op.alter_column('topic_model_meta', 'tool_version', + existing_type=sa.VARCHAR(), + nullable=True, + comment='Standard Augur Metadata', + schema='augur_data') + op.alter_column('topic_model_meta', 'data_source', + existing_type=sa.VARCHAR(), + nullable=True, + comment='Standard Augur Metadata', + schema='augur_data') + op.alter_column('topic_model_meta', 'data_collection_date', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=True, + existing_server_default=sa.text('CURRENT_TIMESTAMP'), + schema='augur_data') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('topic_model_meta', 'data_collection_date', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=False, + existing_server_default=sa.text('CURRENT_TIMESTAMP'), + schema='augur_data') + op.alter_column('topic_model_meta', 'data_source', + existing_type=sa.VARCHAR(), + nullable=False, + comment=None, + existing_comment='Standard Augur Metadata', + schema='augur_data') + op.alter_column('topic_model_meta', 'tool_version', + existing_type=sa.VARCHAR(), + nullable=False, + comment=None, + existing_comment='Standard Augur Metadata', + schema='augur_data') + op.alter_column('topic_model_meta', 'tool_source', + existing_type=sa.VARCHAR(), + nullable=False, + comment=None, + existing_comment='Standard Augur Metadata', + schema='augur_data') + op.alter_column('topic_model_meta', 'training_end_time', + existing_type=postgresql.TIMESTAMP(timezone=True), + comment=None, + existing_comment='When training ended', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'training_start_time', + existing_type=postgresql.TIMESTAMP(timezone=True), + comment=None, + existing_comment='When training started', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'visualization_data', + existing_type=sa.JSON(), + type_=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment='JSON object containing visualization data for the model', + existing_nullable=True, + schema='augur_data') + op.alter_column('topic_model_meta', 'data_fingerprint', + existing_type=sa.JSON(), + type_=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment='Fingerprint of training data', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'training_message_count', + existing_type=sa.BIGINT(), + comment=None, + existing_comment='Number of messages used for training', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'quality', + existing_type=sa.JSON(), + type_=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment='Quality metrics', + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + schema='augur_data') + op.alter_column('topic_model_meta', 'topic_diversity', + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment='Topic diversity score', + existing_nullable=False, + existing_server_default=sa.text('0.0'), + schema='augur_data') + op.alter_column('topic_model_meta', 'perplexity_score', + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment='Perplexity score of the model', + existing_nullable=False, + existing_server_default=sa.text('0.0'), + schema='augur_data') + op.alter_column('topic_model_meta', 'coherence_score', + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment='Coherence score of the model', + existing_nullable=False, + existing_server_default=sa.text('0.0'), + schema='augur_data') + op.alter_column('topic_model_meta', 'parameters_hash', + existing_type=sa.VARCHAR(), + comment=None, + existing_comment='Hash of parameters for deduplication', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'model_file_paths', + existing_type=sa.JSON(), + type_=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment='JSON object containing paths to model artifacts', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'training_parameters', + existing_type=sa.JSON(), + type_=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment='JSON object containing training parameters', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'num_words_per_topic', + existing_type=sa.INTEGER(), + comment=None, + existing_comment='Number of words per topic', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'num_topics', + existing_type=sa.INTEGER(), + comment=None, + existing_comment='Number of topics in the model', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'model_method', + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Method used for topic modeling (e.g., 'NMF_COUNT', 'LDA_TFIDF')", + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_meta', 'repo_id', + existing_type=sa.BigInteger(), + type_=sa.INTEGER(), + comment=None, + existing_comment='Repository this model was trained on', + existing_nullable=True, + schema='augur_data') + op.alter_column('topic_model_meta', 'model_id', + existing_type=sa.UUID(), + comment=None, + existing_comment='Unique identifier for the topic model', + existing_nullable=False, + existing_server_default=sa.text('gen_random_uuid()'), + schema='augur_data') + op.alter_column('topic_model_event', 'payload', + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment='Event payload data', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_event', 'level', + existing_type=sa.TEXT(), + comment=None, + existing_comment='Log level (INFO, WARNING, ERROR, etc.)', + existing_nullable=False, + existing_server_default=sa.text("'INFO'::text"), + schema='augur_data') + op.alter_column('topic_model_event', 'event', + existing_type=sa.TEXT(), + comment=None, + existing_comment='Event type or name', + existing_nullable=False, + schema='augur_data') + op.alter_column('topic_model_event', 'model_id', + existing_type=sa.UUID(), + comment=None, + existing_comment='Topic model associated with this event', + existing_nullable=True, + schema='augur_data') + op.alter_column('topic_model_event', 'repo_id', + existing_type=sa.INTEGER(), + comment=None, + existing_comment='Repository associated with this event', + existing_nullable=True, + schema='augur_data') + op.alter_column('topic_model_event', 'ts', + existing_type=postgresql.TIMESTAMP(timezone=True), + comment=None, + existing_comment='Timestamp when the event occurred', + existing_nullable=False, + existing_server_default=sa.text('CURRENT_TIMESTAMP'), + schema='augur_data') + op.alter_column('topic_model_event', 'event_id', + existing_type=sa.BIGINT(), + comment=None, + existing_comment='Unique identifier for the event', + existing_nullable=False, + autoincrement=True, + schema='augur_data') + # ### end Alembic commands ### From 17963e42c3f91d795e3766ed1511a2b0dd88d985 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Sat, 15 Nov 2025 10:32:19 +0530 Subject: [PATCH 019/104] Fix: collection_intervals into seconds Signed-off-by: PredictiveManish --- docs/source/getting-started/collecting-data.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index cb21922047..7c75097fd5 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -97,7 +97,10 @@ The celery monitor is responsible for generating the tasks that will tell the ot - ``refresh_materialized_views_interval_in_days``, number of days to wait between refreshes of materialized views. If you choose, you can also adjust the values in the ``Tasks`` block if you would like to control when tasks should be re-run on a given repository. -This is specified as a number of days since the last successful run. + +- ``collection_interval``, the interval (in seconds) at which the collection monitor task runs to schedule new collection jobs. This is different from the other interval values which use days. + +- ``core_collection_interval_days``, ``secondary_collection_interval_days``, ``facade_collection_interval_days``, and ``ml_collection_interval_days``, which specify the number of days since the last successful run before a task should be re-run on a given repository. Adding repos for collection ----------------------------- From a9694236473f804b76323cf3dd3f5f7e740e9905 Mon Sep 17 00:00:00 2001 From: Adeeba Nizam Date: Mon, 10 Nov 2025 02:40:48 +0530 Subject: [PATCH 020/104] docs: move contributor lists to CONTRIBUTORS.md and update README for clarity Signed-off-by: Adeeba Nizam --- CONTRIBUTORS.md | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 66 ++----------------------------------- 2 files changed, 89 insertions(+), 64 deletions(-) create mode 100644 CONTRIBUTORS.md diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md new file mode 100644 index 0000000000..a898578fd0 --- /dev/null +++ b/CONTRIBUTORS.md @@ -0,0 +1,87 @@ +# Contributors & Participants + +This file contains full attribution lists for: +- Current maintainers +- Founding Maintainers +- Former maintainers +- Contributors +- Google Summer of Code participants (by year) + +--- +## Current Maintainers +- Sean P. Goggins — [@sgoggins](https://github.com/sgoggins) +- Adrian Edwards — [@MoralCode](https://github.com/MoralCode) +- Andrew Brain — [@ABrain7710](https://github.com/ABrain7710) +- Isaac Milarsky — [@IsaacMilarky](https://github.com/IsaacMilarky) +- John McGinnis — [@Ulincys](https://github.com/Ulincsys) + +--- + +## Founding Maintainers +- Derek Howard — [@howderek](https://github.com/howderek) + +## Former Maintainers +- Carter Landis — [@ccarterlandis](https://github.com/ccarterlandis) +- Gabe Heim — [@gabe-heim](https://github.com/gabe-heim) +- Matt Snell — [@Nebrethar](https://github.com/Nebrethar) +- Christian Cmehil-Warn — [@christiancme](https://github.com/christiancme) +- Jonah Zukosky — [@jonahz5222](https://github.com/jonahz5222) +- Carolyn Perniciaro — [@CMPerniciaro](https://github.com/CMPerniciaro) +- Elita Nelson — [@ElitaNelson](https://github.com/ElitaNelson) +- Michael Woodruff — [@michaelwoodruffdev](https://github.com/michaelwoodruffdev/) +- Max Balk — [@maxbalk](https://github.com/maxbalk/) + +--- + +## Contributors +- [Dawn Foster](https://github.com/geekygirldawn) +- [Ivana Atanasova](https://github.com/ivanayov) +- [Georg J.P. Link](https://github.com/GeorgLink) +- [Gary P. White](https://github.com/garypwhite) + +--- + +## GSoC 2025 Participants +- [Akshat Baranwal](https://github.com/akshatb2006) +- [Asish Kumar](https://github.com/officialasishkumar) +- [Jiahong Lin](https://github.com/xiaoha-cloud) + +--- + +## GSoC 2022 Participants +- [Kaxada](https://github.com/kaxada) +- [Mabel F](https://github.com/mabelbot) +- [Priya Srivastava](https://github.com/Priya730) +- [Ramya Kappagantu](https://github.com/RamyaKappagantu) +- [Yash Prakash](https://gist.github.com/yash-yp) + +--- + +## GSoC 2021 Participants +- [Dhruv Sachdev](https://github.com/Dhruv-Sachdev1313) +- [Rashmi K A](https://github.com/Rashmi-K-A) +- [Yash Prakash](https://gist.github.com/yash-yp) +- [Anuj Lamoria](https://github.com/anujlamoria) +- [Yeming Gu](https://github.com/gymgym1212) +- [Ritik Malik](https://gist.github.com/ritik-malik) + +--- + +## GSoC 2020 Participants +- [Akshara P](https://github.com/aksh555) +- [Tianyi Zhou](https://github.com/tianyichow) +- [Pratik Mishra](https://github.com/pratikmishra356) +- [Sarit Adhikari](https://github.com/sarit-adh) +- [Saicharan Reddy](https://github.com/mrsaicharan1) +- [Abhinav Bajpai](https://github.com/abhinavbajpai2012) + +--- + +## GSoC 2019 Participants +- [Bingwen Ma](https://github.com/bing0n3) +- [Parth Sharma](https://github.com/parthsharma2) + +--- + +## GSoC 2018 Participants +- [Keanu Nichols](https://github.com/kmn5409) diff --git a/README.md b/README.md index bac449c3d8..fef4c26622 100644 --- a/README.md +++ b/README.md @@ -90,68 +90,6 @@ Augur is free software: you can redistribute it and/or modify it under the terms This work has been funded through the Alfred P. Sloan Foundation, Mozilla, The Reynolds Journalism Institute, contributions from VMWare, Red Hat Software, Grace Hopper's Open Source Day, GitHub, Microsoft, Twitter, Adobe, the Gluster Project, Open Source Summit (NA/Europe), and the Linux Foundation Compliance Summit. Significant design contributors include Kate Stewart, Dawn Foster, Duane O'Brien, Remy Decausemaker, others omitted due to the memory limitations of project maintainers, and 15 Google Summer of Code Students. +## Maintainers & Contributors -Current maintainers --------------------- -- `Derek Howard `_ -- `Andrew Brain `_ -- `Isaac Milarsky `_ -- `John McGinnis `_ -- `Sean P. Goggins `_ - -Former maintainers --------------------- -- `Carter Landis `_ -- `Gabe Heim `_ -- `Matt Snell `_ -- `Christian Cmehil-Warn `_ -- `Jonah Zukosky `_ -- `Carolyn Perniciaro `_ -- `Elita Nelson `_ -- `Michael Woodruff `_ -- `Max Balk `_ - -Contributors --------------------- -- `Dawn Foster `_ -- `Ivana Atanasova `_ -- `Georg J.P. Link `_ -- `Gary P White `_ - -GSoC 2025 Participants ------------------------ - -GSoC 2022 participants ------------------------ -- `Kaxada `_ -- `Mabel F `_ -- `Priya Srivastava `_ -- `Ramya Kappagantu `_ -- `Yash Prakash `_ - -GSoC 2021 participants ------------------------ -- `Dhruv Sachdev `_ -- `Rashmi K A `_ -- `Yash Prakash `_ -- `Anuj Lamoria `_ -- `Yeming Gu `_ -- `Ritik Malik `_ - -GSoC 2020 participants ------------------------ -- `Akshara P `_ -- `Tianyi Zhou `_ -- `Pratik Mishra `_ -- `Sarit Adhikari `_ -- `Saicharan Reddy `_ -- `Abhinav Bajpai `_ - -GSoC 2019 participants ------------------------ -- `Bingwen Ma `_ -- `Parth Sharma `_ - -GSoC 2018 participants ------------------------ -- `Keanu Nichols `_ +Refer to [CONTRIBUTORS.md](./CONTRIBUTORS.md) for detailed information about project maintainers, contributors, and GSoC participants. From 95b2b7835298b9613f1a83d35b7720925dea1733 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 11:40:10 -0500 Subject: [PATCH 021/104] connect up the url in another place to prevent errors about a missing config file Signed-off-by: Adrian Edwards --- augur/application/schema/alembic/env.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index b6be9dee05..6949acd16f 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -5,6 +5,8 @@ from augur.application.db.models.base import Base from augur.application.db.engine import get_database_string from sqlalchemy import create_engine, event +import os + # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -26,6 +28,9 @@ # my_important_option = config.get_main_option("my_important_option") # ... etc. +# possibly swap sqlalchemy.url with AUGUR_DB env var too + +sqlalchemy_url = os.getenv("AUGUR_DB") or config.get_main_option("sqlalchemy.url") def run_migrations_offline(): """Run migrations in 'offline' mode. @@ -39,9 +44,8 @@ def run_migrations_offline(): script output. """ - url = config.get_main_option("sqlalchemy.url") context.configure( - url=url, + url=sqlalchemy_url, target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, @@ -58,7 +62,7 @@ def run_migrations_online(): and associate a connection with the context. """ - url = get_database_string() + url = sqlalchemy_url engine = create_engine(url) @event.listens_for(engine, "connect", insert=True) From d78f5dceb4f35218e9d995b7d24327b703addd8f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 11:40:19 -0500 Subject: [PATCH 022/104] install python-dotenv Signed-off-by: Adrian Edwards --- pyproject.toml | 1 + uv.lock | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 801ac54574..264aff98f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ dev = [ { include-group = "test" }, { include-group = "debug" }, { include-group = "docs" }, + "python-dotenv>=1.2.1", ] lint = [ "pylint", diff --git a/uv.lock b/uv.lock index 37df99ba32..ca3f330ff8 100644 --- a/uv.lock +++ b/uv.lock @@ -224,6 +224,7 @@ dev = [ { name = "mypy" }, { name = "pylint" }, { name = "pytest" }, + { name = "python-dotenv" }, { name = "setuptools" }, { name = "sphinx" }, { name = "sphinx-rtd-theme" }, @@ -346,6 +347,7 @@ dev = [ { name = "mypy", specifier = ">=1.18.2" }, { name = "pylint" }, { name = "pytest" }, + { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "setuptools" }, { name = "sphinx", specifier = "==7.2.6" }, { name = "sphinx-rtd-theme", specifier = "==2.0.0" }, @@ -2782,6 +2784,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-dotenv" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" }, +] + [[package]] name = "python-http-client" version = "3.3.7" From 8f7368f625f811f8d6411de4edb5ede8b6343ffe Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 12:09:35 -0500 Subject: [PATCH 023/104] load from .env Signed-off-by: Adrian Edwards --- augur/application/schema/alembic/env.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index 6949acd16f..664f96afd2 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -5,8 +5,10 @@ from augur.application.db.models.base import Base from augur.application.db.engine import get_database_string from sqlalchemy import create_engine, event +from dotenv import load_dotenv import os +load_dotenv() # this is the Alembic Config object, which provides # access to the values within the .ini file in use. From 972303a4aa1c8c95f709d1ed665cf388a017184f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 12:10:21 -0500 Subject: [PATCH 024/104] remove date from migration filename format Signed-off-by: Adrian Edwards --- alembic.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alembic.ini b/alembic.ini index c8976b991b..c36965bb80 100644 --- a/alembic.ini +++ b/alembic.ini @@ -5,7 +5,7 @@ script_location = augur/application/schema/alembic # template used to generate migration files -file_template = %%(year)d-%%(month).2d-%%(day).2d_%%(rev)s_%%(slug)s +file_template = %%(rev)s_%%(slug)s # sys.path path, will be prepended to sys.path if present. # defaults to the current working directory. From 0e54842c242dcb92ae9c952be21f467016ccd6ba Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 13:21:26 -0500 Subject: [PATCH 025/104] set up alembic to automatically determine the next version number Generated-by: gpt-5 via cursor Signed-off-by: Adrian Edwards --- augur/application/schema/alembic/env.py | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index 664f96afd2..43b001ddaf 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -7,6 +7,8 @@ from sqlalchemy import create_engine, event from dotenv import load_dotenv import os +import re +from pathlib import Path load_dotenv() @@ -34,6 +36,34 @@ sqlalchemy_url = os.getenv("AUGUR_DB") or config.get_main_option("sqlalchemy.url") + +VERSIONS_DIR = Path(__file__).parent / "versions" + +def _next_int_rev() -> str: + max_rev = 0 + for p in VERSIONS_DIR.glob("*.py"): + try: + txt = p.read_text(encoding="utf-8") + except Exception: + continue + m = re.search(r"^revision\s*=\s*['\"]([^'\"]+)['\"]", txt, re.M) + if m and m.group(1).isdigit(): + max_rev = max(max_rev, int(m.group(1))) + return str(max_rev + 1) + +def process_revision_directives(context, revision, directives): + if not directives: + return + script = directives[0] + # If user passed --rev-id, honor it; otherwise override Alembic's default + opts = getattr(context.config, "cmd_opts", None) + user_rev_id = getattr(opts, "rev_id", None) + if user_rev_id: + script.rev_id = str(user_rev_id) + else: + script.rev_id = _next_int_rev() + + def run_migrations_offline(): """Run migrations in 'offline' mode. @@ -51,6 +81,7 @@ def run_migrations_offline(): target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, + process_revision_directives=process_revision_directives, ) with context.begin_transaction(): @@ -84,6 +115,7 @@ def set_search_path(dbapi_connection, connection_record): version_table_schema=target_metadata.schema, include_schemas=True, compare_type=True, + process_revision_directives=process_revision_directives, ) with context.begin_transaction(): From 28fb8397dd5159a76805e22d3e4e8197b9d3e3c8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 13:29:09 -0500 Subject: [PATCH 026/104] replace file contents-based revision check with one that just looks at the filenames Signed-off-by: Adrian Edwards --- augur/application/schema/alembic/env.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index 43b001ddaf..95f5bd4270 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -42,11 +42,8 @@ def _next_int_rev() -> str: max_rev = 0 for p in VERSIONS_DIR.glob("*.py"): - try: - txt = p.read_text(encoding="utf-8") - except Exception: - continue - m = re.search(r"^revision\s*=\s*['\"]([^'\"]+)['\"]", txt, re.M) + pathname = Path(p).name + m = re.search(r"^([\d]+)_[a-zA-Z0-9_]+.py", pathname, re.M) if m and m.group(1).isdigit(): max_rev = max(max_rev, int(m.group(1))) return str(max_rev + 1) From e286f8f535e958cefc05e4bbcf7f7295c0e5f6c2 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 14:11:03 -0500 Subject: [PATCH 027/104] =?UTF-8?q?remove=20search=20paths=20"It=20can=20m?= =?UTF-8?q?ake=20reflection=20=E2=80=9Close=E2=80=9D=20schema=20names.=20R?= =?UTF-8?q?emove=20the=20connect=20listener=20that=20sets=20search=5Fpath?= =?UTF-8?q?=20while=20generating=20migrations."=20-=20gpt5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assisted-by: GPT5 via cursor Signed-off-by: Adrian Edwards --- augur/application/schema/alembic/env.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index 95f5bd4270..f30febbb95 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -95,14 +95,6 @@ def run_migrations_online(): url = sqlalchemy_url engine = create_engine(url) - @event.listens_for(engine, "connect", insert=True) - def set_search_path(dbapi_connection, connection_record): - existing_autocommit = dbapi_connection.autocommit - dbapi_connection.autocommit = True - cursor = dbapi_connection.cursor() - cursor.execute("SET SESSION search_path=public,augur_data,augur_operations,spdx") - cursor.close() - dbapi_connection.autocommit = existing_autocommit with engine.connect() as connection: From ec7793da5e683395b732e58a7def4547176c05fe Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 14:53:32 -0500 Subject: [PATCH 028/104] update alembic Signed-off-by: Adrian Edwards --- pyproject.toml | 4 ++-- uv.lock | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 264aff98f4..ffb17b99d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ classifiers = [ "Programming Language :: Python :: 3.10" ] dependencies = [ - "alembic==1.8.1", + "alembic>=1.17.1", "Beaker==1.11.0", "blinker==1.4", "bokeh==2.0.2", @@ -86,7 +86,7 @@ dependencies = [ "toml", "toolz>=0.8.2", "tornado==6.4.1", - "typing-extensions==4.7.1", + "typing-extensions>=4.7", "Werkzeug~=2.0.0", "xgboost==3.0.2", "xlrd==2.0.1", diff --git a/uv.lock b/uv.lock index ca3f330ff8..84bcaef4e6 100644 --- a/uv.lock +++ b/uv.lock @@ -30,15 +30,17 @@ wheels = [ [[package]] name = "alembic" -version = "1.8.1" +version = "1.17.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mako" }, { name = "sqlalchemy" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/37/ab/80e6d86ca81235ea1a7104089dddf74de4b45f8af0a05d4b265be44d6ff9/alembic-1.8.1.tar.gz", hash = "sha256:cd0b5e45b14b706426b833f06369b9a6d5ee03f826ec3238723ce8caaf6e5ffa", size = 1255927, upload-time = "2022-07-13T14:18:50.766Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6e/b6/2a81d7724c0c124edc5ec7a167e85858b6fd31b9611c6fb8ecf617b7e2d3/alembic-1.17.1.tar.gz", hash = "sha256:8a289f6778262df31571d29cca4c7fbacd2f0f582ea0816f4c399b6da7528486", size = 1981285, upload-time = "2025-10-29T00:23:16.667Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/c8/69600a8138a56794713ecdb8b75b14fbe32a410bc444683f27dbab93c0ca/alembic-1.8.1-py3-none-any.whl", hash = "sha256:0a024d7f2de88d738d7395ff866997314c837be6104e90c5724350313dee4da4", size = 209845, upload-time = "2022-07-13T14:18:53.415Z" }, + { url = "https://files.pythonhosted.org/packages/a5/32/7df1d81ec2e50fb661944a35183d87e62d3f6c6d9f8aff64a4f245226d55/alembic-1.17.1-py3-none-any.whl", hash = "sha256:cbc2386e60f89608bb63f30d2d6cc66c7aaed1fe105bd862828600e5ad167023", size = 247848, upload-time = "2025-10-29T00:23:18.79Z" }, ] [[package]] @@ -265,7 +267,7 @@ test = [ [package.metadata] requires-dist = [ - { name = "alembic", specifier = "==1.8.1" }, + { name = "alembic", specifier = ">=1.17.1" }, { name = "beaker", specifier = "==1.11.0" }, { name = "blinker", specifier = "==1.4" }, { name = "bokeh", specifier = "==2.0.2" }, @@ -332,7 +334,7 @@ requires-dist = [ { name = "toml" }, { name = "toolz", specifier = ">=0.8.2" }, { name = "tornado", specifier = "==6.4.1" }, - { name = "typing-extensions", specifier = "==4.7.1" }, + { name = "typing-extensions", specifier = ">=4.7" }, { name = "werkzeug", specifier = "~=2.0.0" }, { name = "xgboost", specifier = "==3.0.2" }, { name = "xlrd", specifier = "==2.0.1" }, @@ -4076,11 +4078,11 @@ wheels = [ [[package]] name = "typing-extensions" -version = "4.7.1" +version = "4.15.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3c/8b/0111dd7d6c1478bf83baa1cab85c686426c7a6274119aceb2bd9d35395ad/typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2", size = 72876, upload-time = "2023-07-02T14:20:55.045Z" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/6b/63cc3df74987c36fe26157ee12e09e8f9db4de771e0f3404263117e75b95/typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36", size = 33232, upload-time = "2023-07-02T14:20:53.275Z" }, + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] [[package]] From 7b18880c5ffd927260ee60fbf5ac014acd58cfd2 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 14:55:06 -0500 Subject: [PATCH 029/104] include schemas/be schema-aware in offline version of migrations too Signed-off-by: Adrian Edwards --- augur/application/schema/alembic/env.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index f30febbb95..2b6fa91f8d 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -79,6 +79,7 @@ def run_migrations_offline(): literal_binds=True, dialect_opts={"paramstyle": "named"}, process_revision_directives=process_revision_directives, + include_schemas=True, ) with context.begin_transaction(): From 0f7da8e3372576ee68a8008466659b8e2b5537fb Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 15:35:21 -0500 Subject: [PATCH 030/104] use the public schema by default for the version table schema. Trying to explicitly set it causes the version table to get dropped as part of the next generated automatic migration Signed-off-by: Adrian Edwards --- augur/application/schema/alembic/env.py | 1 - 1 file changed, 1 deletion(-) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index 2b6fa91f8d..827492f7a2 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -102,7 +102,6 @@ def run_migrations_online(): context.configure( connection=connection, target_metadata=target_metadata, - version_table_schema=target_metadata.schema, include_schemas=True, compare_type=True, process_revision_directives=process_revision_directives, From 31bd7f447c51a2627eb313e8856e69ebdcf07a3e Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 18 Nov 2025 14:56:53 -0500 Subject: [PATCH 031/104] remove unused imports per reviewdog Signed-off-by: Adrian Edwards --- augur/application/schema/alembic/env.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index 827492f7a2..bf2993c4b1 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -3,8 +3,7 @@ from alembic import context from augur.application.db.models.base import Base -from augur.application.db.engine import get_database_string -from sqlalchemy import create_engine, event +from sqlalchemy import create_engine from dotenv import load_dotenv import os import re From 361dbf854da15a726f0587449a30b6143360b037 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 18 Nov 2025 15:32:40 -0500 Subject: [PATCH 032/104] python-dotenv is not just a dev dependency Signed-off-by: Adrian Edwards --- pyproject.toml | 2 +- uv.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ffb17b99d8..908558f239 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ dependencies = [ "psycopg2-binary==2.9.9", "pylint==2.15.5", "python-crfsuite>=0.9.8", + "python-dotenv>=1.2.1", "pyYaml", "redis==4.3.3", "requests==2.32.0", @@ -99,7 +100,6 @@ dev = [ { include-group = "test" }, { include-group = "debug" }, { include-group = "docs" }, - "python-dotenv>=1.2.1", ] lint = [ "pylint", diff --git a/uv.lock b/uv.lock index 84bcaef4e6..819bc2be00 100644 --- a/uv.lock +++ b/uv.lock @@ -187,6 +187,7 @@ dependencies = [ { name = "psycopg2-binary" }, { name = "pylint" }, { name = "python-crfsuite" }, + { name = "python-dotenv" }, { name = "pyyaml" }, { name = "redis" }, { name = "requests" }, @@ -226,7 +227,6 @@ dev = [ { name = "mypy" }, { name = "pylint" }, { name = "pytest" }, - { name = "python-dotenv" }, { name = "setuptools" }, { name = "sphinx" }, { name = "sphinx-rtd-theme" }, @@ -312,6 +312,7 @@ requires-dist = [ { name = "psycopg2-binary", specifier = "==2.9.9" }, { name = "pylint", specifier = "==2.15.5" }, { name = "python-crfsuite", specifier = ">=0.9.8" }, + { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "pyyaml" }, { name = "redis", specifier = "==4.3.3" }, { name = "requests", specifier = "==2.32.0" }, @@ -349,7 +350,6 @@ dev = [ { name = "mypy", specifier = ">=1.18.2" }, { name = "pylint" }, { name = "pytest" }, - { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "setuptools" }, { name = "sphinx", specifier = "==7.2.6" }, { name = "sphinx-rtd-theme", specifier = "==2.0.0" }, From 21a02b222f4474f3bedbb44acacfc811426a5d07 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Tue, 18 Nov 2025 18:41:43 -0500 Subject: [PATCH 033/104] Update John's name Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- CONTRIBUTORS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index a898578fd0..8599944b00 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -13,7 +13,7 @@ This file contains full attribution lists for: - Adrian Edwards — [@MoralCode](https://github.com/MoralCode) - Andrew Brain — [@ABrain7710](https://github.com/ABrain7710) - Isaac Milarsky — [@IsaacMilarky](https://github.com/IsaacMilarky) -- John McGinnis — [@Ulincys](https://github.com/Ulincsys) +- John McGinness — [@Ulincys](https://github.com/Ulincsys) --- @@ -85,3 +85,4 @@ This file contains full attribution lists for: ## GSoC 2018 Participants - [Keanu Nichols](https://github.com/kmn5409) + From f33054b13f8f0d46cae2e9dc07e644a836a9dea8 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Sun, 16 Nov 2025 21:52:20 -0500 Subject: [PATCH 034/104] refactor DEI and user CLI functions to use context managers for database sessions and improve error handling Signed-off-by: Shlok Gilda --- augur/api/routes/dei.py | 155 ++++++++++++++++++---------------- augur/application/cli/user.py | 51 +++++------ 2 files changed, 108 insertions(+), 98 deletions(-) diff --git a/augur/api/routes/dei.py b/augur/api/routes/dei.py index 646081ba2c..44fe014615 100644 --- a/augur/api/routes/dei.py +++ b/augur/api/routes/dei.py @@ -12,7 +12,7 @@ from augur.application.db.models import ClientApplication, CollectionStatus, Repo, RepoGroup, BadgingDEI from augur.application.db.session import DatabaseSession -from augur.tasks.util.collection_util import CollectionRequest,AugurTaskRoutine, get_enabled_phase_names_from_config, core_task_success_util +from augur.tasks.util.collection_util import CollectionRequest,AugurTaskRoutine, get_enabled_phase_names_from_config_session, core_task_success_util from augur.tasks.start_tasks import prelim_phase, primary_repo_collect_phase from augur.tasks.github.util.util import get_repo_weight_by_issue @@ -33,70 +33,71 @@ def dei_track_repo(application: ClientApplication): if not (dei_id and level and repo_url): return jsonify({"status": "Missing argument"}), 400 - + repo_url = repo_url.lower() - - session = DatabaseSession(logger, engine=current_app.engine) - session.autocommit = True - repo: Repo = session.query(Repo).filter(Repo.repo_git==repo_url).first() - if repo: - # Making the assumption that only new repos will be added with this endpoint - return jsonify({"status": "Repo already exists"}) - - frontend_repo_group: RepoGroup = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first() - repo_id = Repo.insert_github_repo(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="") - if not repo_id: - return jsonify({"status": "Error adding repo"}) - - repo = Repo.get_by_id(session, repo_id) - repo_git = repo.repo_git - pr_issue_count = get_repo_weight_by_issue(logger, repo_git) - - record = { - "repo_id": repo_id, - "issue_pr_sum": pr_issue_count, - "core_weight": -9223372036854775808, - "secondary_weight": -9223372036854775808, - "ml_weight": -9223372036854775808 - } - - collection_status_unique = ["repo_id"] - session.insert_data(record, CollectionStatus, collection_status_unique, on_conflict_update=False) - - record = { - "badging_id": dei_id, - "level": level, - "repo_id": repo_id - } - - enabled_phase_names = get_enabled_phase_names_from_config_session(session, logger) - - #Primary collection hook. - primary_enabled_phases = [] - - #Primary jobs - if prelim_phase.__name__ in enabled_phase_names: - primary_enabled_phases.append(prelim_phase) - - primary_enabled_phases.append(primary_repo_collect_phase) - - #task success is scheduled no matter what the config says. - def core_task_success_util_gen(repo_git): - return core_task_success_util.si(repo_git) - - primary_enabled_phases.append(core_task_success_util_gen) - - record = BadgingDEI(**record) - session.add(record) - - deiHook = CollectionRequest("core",primary_enabled_phases) - deiHook.repo_list = [repo_url] - - singleRoutine = AugurTaskRoutine(logger, session,[deiHook]) - singleRoutine.start_data_collection() - #start_block_of_repos(logger, session, [repo_url], primary_enabled_phases, "new") - - session.close() + + # Use context manager to ensure proper session cleanup + with DatabaseSession(logger, engine=current_app.engine) as session: + repo: Repo = session.query(Repo).filter(Repo.repo_git==repo_url).first() + if repo: + # Making the assumption that only new repos will be added with this endpoint + return jsonify({"status": "Repo already exists"}) + + frontend_repo_group: RepoGroup = session.query(RepoGroup).filter(RepoGroup.rg_name == FRONTEND_REPO_GROUP_NAME).first() + repo_id = Repo.insert_github_repo(session, repo_url, frontend_repo_group.repo_group_id, "API.DEI", repo_type="") + if not repo_id: + return jsonify({"status": "Error adding repo"}) + + repo = Repo.get_by_id(session, repo_id) + repo_git = repo.repo_git + pr_issue_count = get_repo_weight_by_issue(logger, repo_git) + + record = { + "repo_id": repo_id, + "issue_pr_sum": pr_issue_count, + "core_weight": -9223372036854775808, + "secondary_weight": -9223372036854775808, + "ml_weight": -9223372036854775808 + } + + collection_status_unique = ["repo_id"] + session.insert_data(record, CollectionStatus, collection_status_unique, on_conflict_update=False) + + record = { + "badging_id": dei_id, + "level": level, + "repo_id": repo_id + } + + enabled_phase_names = get_enabled_phase_names_from_config_session(session, logger) + + # Primary collection hook. + primary_enabled_phases = [] + + # Primary jobs + if prelim_phase.__name__ in enabled_phase_names: + primary_enabled_phases.append(prelim_phase) + + primary_enabled_phases.append(primary_repo_collect_phase) + + #task success is scheduled no matter what the config says. + def core_task_success_util_gen(repo_git): + return core_task_success_util.si(repo_git) + + primary_enabled_phases.append(core_task_success_util_gen) + + record = BadgingDEI(**record) + session.add(record) + + # Explicitly commit the session to persist BadgingDEI record + session.commit() + + deiHook = CollectionRequest("core",primary_enabled_phases) + deiHook.repo_list = [repo_url] + + singleRoutine = AugurTaskRoutine(logger, session,[deiHook]) + singleRoutine.start_data_collection() + #start_block_of_repos(logger, session, [repo_url], primary_enabled_phases, "new") return jsonify({"status": "Success"}) @@ -108,25 +109,31 @@ def dei_report(application: ClientApplication): if not dei_id: return jsonify({"status": "Missing argument"}), 400 - - session = DatabaseSession(logger, engine=current_app.engine) - project: BadgingDEI = session.query(BadgingDEI).filter(BadgingDEI.badging_id==dei_id).first() + # Use context manager but scope it carefully to cover lazy-loading + with DatabaseSession(logger, engine=current_app.engine) as session: + project: BadgingDEI = session.query(BadgingDEI).filter(BadgingDEI.badging_id==dei_id).first() + + if not project: + return jsonify({"status": "Invalid ID"}) + + # Render template while session is still open (accesses project.repo via lazy-loading) + md = render_template("dei-badging-report.j2", project=project) - if not project: - return jsonify({"status": "Invalid ID"}) - - md = render_template("dei-badging-report.j2", project=project) + # Store project.id before session closes + project_id = project.id + + # Session is now closed - proceed with file operations (no database access needed) cachePath = Path.cwd() / "augur" / "static" / "cache" - source = cachePath / f"{project.id}_badging_report.md" - report = cachePath / f"{project.id}_badging_report.pdf" + source = cachePath / f"{project_id}_badging_report.md" + report = cachePath / f"{project_id}_badging_report.pdf" source.write_text(md) command = f"mdpdf -o {str(report.resolve())} {str(source.resolve())}" converter = subprocess.Popen(command.split()) converter.wait() - + # TODO what goes in the report? return send_file(report.resolve()) \ No newline at end of file diff --git a/augur/application/cli/user.py b/augur/application/cli/user.py index 2cae5d7b22..3787708252 100644 --- a/augur/application/cli/user.py +++ b/augur/application/cli/user.py @@ -38,41 +38,44 @@ def add_user(username, email, firstname, lastname, admin, phone_number, password """Add a new user to the database with email address = EMAIL.""" session = Session() + try: + if session.query(User).filter(User.login_name == username).first() is not None: + return click.echo("username already taken") - if session.query(User).filter(User.login_name == username).first() is not None: - return click.echo("username already taken") + if session.query(User).filter(User.email == email).first() is not None: + return click.echo("email already signed-up") - if session.query(User).filter(User.email == email).first() is not None: - return click.echo("email already signed-up") - - user = session.query(User).filter(User.login_name == username).first() - if not user: - password = User.compute_hashsed_password(password) - new_user = User(login_name=username, login_hashword=password, email=email, text_phone=phone_number, first_name=firstname, last_name=lastname, admin=admin, tool_source="User CLI", tool_version=None, data_source="CLI") - session.add(new_user) - session.commit() - user_type = "admin user" if admin else "user" - message = f"Successfully added new: {username}" - click.secho(message, bold=True) + user = session.query(User).filter(User.login_name == username).first() + if not user: + password = User.compute_hashsed_password(password) + new_user = User(login_name=username, login_hashword=password, email=email, text_phone=phone_number, first_name=firstname, last_name=lastname, admin=admin, tool_source="User CLI", tool_version=None, data_source="CLI") + session.add(new_user) + session.commit() + user_type = "admin user" if admin else "user" + message = f"Successfully added new: {username}" + click.secho(message, bold=True) + return 0 + finally: session.close() engine.dispose() - - return 0 @cli.command('password_reset', short_help="Reset a user's password") @click.argument("username") @click.password_option(help="New password") def reset_password(username, password): session = Session() + try: + user = session.query(User).filter(User.login_name == username).first() - user = session.query(User).filter(User.login_name == username).first() + if not user: + return click.echo("invalid username") - if not user: - return click.echo("invalid username") - - password = User.compute_hashsed_password(password) - user.login_hashword = password - session.commit() + password = User.compute_hashsed_password(password) + user.login_hashword = password + session.commit() - return click.echo("Password updated") \ No newline at end of file + return click.echo("Password updated") + finally: + session.close() + engine.dispose() \ No newline at end of file From 7aefd1bef36fbf41775229969065674f92aa3e26 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 19 Nov 2025 12:03:55 -0500 Subject: [PATCH 035/104] Fix deadlock issues by implementing timeout handling for git operations Signed-off-by: Shlok Gilda --- .../facade_worker/facade_worker/repofetch.py | 204 ++++++++++++++---- .../facade_worker/utilitymethods.py | 21 +- 2 files changed, 178 insertions(+), 47 deletions(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index f754f4e098..18854c00bd 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -149,7 +149,18 @@ def git_repo_initialize(facade_helper, session, repo_git): facade_helper.log_activity('Verbose', f"Cloning: {git}") cmd = f"git -C {repo_path} clone '{git}' {repo_name}" - return_code = subprocess.Popen([cmd], shell=True).wait() + try: + result = subprocess.run( + cmd, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=7200, # 2 hours for large repos + check=False + ) + return_code = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git clone timed out: {cmd}') + return_code = -1 # Timeout error code if (return_code == 0): # If cloning succeeded, repo is ready for analysis @@ -317,8 +328,18 @@ def git_repo_updates(facade_helper, repo_git): firstpull = (f"git -C {absolute_path} pull") - return_code_remote = subprocess.Popen( - [firstpull], shell=True).wait() + try: + result = subprocess.run( + firstpull, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=600, # 10 minutes for git pull + check=False + ) + return_code_remote = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {firstpull}') + return_code_remote = -1 # Timeout error code facade_helper.log_activity('Verbose', 'Got to here. 1.') @@ -334,13 +355,20 @@ def git_repo_updates(facade_helper, repo_git): getremotedefault = ( f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'") - return_code_remote = subprocess.Popen( - [getremotedefault], stdout=subprocess.PIPE, shell=True).wait() - - remotedefault = subprocess.Popen( - [getremotedefault], stdout=subprocess.PIPE, shell=True).communicate()[0] - - remotedefault = remotedefault.decode() + try: + result = subprocess.run( + getremotedefault, shell=True, + capture_output=True, + encoding='utf-8', errors='replace', + timeout=60, # 1 minute for remote query + check=False + ) + return_code_remote = result.returncode + remotedefault = result.stdout.strip() + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}') + return_code_remote = -1 + remotedefault = '' facade_helper.log_activity( 'Verbose', f'remote default getting checked out is: {remotedefault}.') @@ -351,14 +379,35 @@ def git_repo_updates(facade_helper, repo_git): facade_helper.log_activity( 'Verbose', f"get remote default command is: \n \n {getremotedefault} \n \n ") - return_code_remote_default_again = subprocess.Popen( - [getremotedefault], shell=True).wait() + try: + result = subprocess.run( + getremotedefault, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=600, # 10 minutes for git checkout + check=False + ) + return_code_remote_default_again = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}') + return_code_remote_default_again = -1 # Timeout error code if return_code_remote_default_again == 0: facade_helper.log_activity('Verbose', "local checkout worked.") cmd = (f"git -C {absolute_path} pull") - return_code = subprocess.Popen([cmd], shell=True).wait() + try: + result = subprocess.run( + cmd, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=600, # 10 minutes for git pull + check=False + ) + return_code = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {cmd}') + return_code = -1 # Timeout error code except Exception as e: facade_helper.log_activity( @@ -369,7 +418,18 @@ def git_repo_updates(facade_helper, repo_git): cmd = (f"git -C {absolute_path} pull") - return_code = subprocess.Popen([cmd], shell=True).wait() + try: + result = subprocess.run( + cmd, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=600, # 10 minutes for git pull + check=False + ) + return_code = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {cmd}') + return_code = -1 # Timeout error code # If the attempt succeeded, then don't try any further fixes. If # the attempt to fix things failed, give up and try next time. @@ -392,37 +452,58 @@ def git_repo_updates(facade_helper, repo_git): getremotedefault = ( f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'") - return_code_remote = subprocess.Popen( - [getremotedefault], stdout=subprocess.PIPE, shell=True).wait() - - remotedefault = subprocess.Popen( - [getremotedefault], stdout=subprocess.PIPE, shell=True).communicate()[0] - - remotedefault = remotedefault.decode() + try: + result = subprocess.run( + getremotedefault, shell=True, + capture_output=True, + encoding='utf-8', errors='replace', + timeout=60, # 1 minute for remote query + check=False + ) + return_code_remote = result.returncode + remotedefault = result.stdout.strip() + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}') + return_code_remote = -1 + remotedefault = '' try: getremotedefault = ( f"git -C {absolute_path} checkout {remotedefault}") - return_code_remote_default = subprocess.Popen( - [getremotedefault], stdout=subprocess.PIPE, shell=True).wait() - - return_message_getremotedefault = subprocess.Popen( - [getremotedefault], stdout=subprocess.PIPE, shell=True).communicate()[0] + try: + result = subprocess.run( + getremotedefault, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=600, # 10 minutes for git checkout + check=False + ) + return_code_remote_default = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}') + return_code_remote_default = -1 # Timeout error code facade_helper.log_activity( - 'Verbose', f'get remote default result: {return_message_getremotedefault}') + 'Verbose', f'get remote default result (return code): {return_code_remote_default}') getcurrentbranch = (f"git -C {absolute_path} branch") - return_code_local = subprocess.Popen( - [getcurrentbranch], stdout=subprocess.PIPE, shell=True).wait() - - localdefault = subprocess.Popen( - [getcurrentbranch], stdout=subprocess.PIPE, shell=True).communicate()[0] - - localdefault = localdefault.decode() + try: + result = subprocess.run( + getcurrentbranch, shell=True, + capture_output=True, + encoding='utf-8', errors='replace', + timeout=60, # 1 minute for branch query + check=False + ) + return_code_local = result.returncode + localdefault = result.stdout + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {getcurrentbranch}') + return_code_local = -1 + localdefault = '' facade_helper.log_activity( 'Verbose', f'remote default is: {remotedefault}, and localdefault is {localdefault}.') @@ -430,20 +511,50 @@ def git_repo_updates(facade_helper, repo_git): cmd_checkout_default = ( f"git -C {absolute_path} checkout {remotedefault}") - cmd_checkout_default_wait = subprocess.Popen( - [cmd_checkout_default], shell=True).wait() + try: + result = subprocess.run( + cmd_checkout_default, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=600, # 10 minutes for git checkout + check=False + ) + cmd_checkout_default_wait = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {cmd_checkout_default}') + cmd_checkout_default_wait = -1 cmdpull2 = (f"git -C {absolute_path} pull") cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}") - cmd_reset_wait = subprocess.Popen( - [cmd_reset], shell=True).wait() + try: + result = subprocess.run( + cmd_reset, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=300, # 5 minutes for git reset + check=False + ) + cmd_reset_wait = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {cmd_reset}') + cmd_reset_wait = -1 cmd_clean = (f"git -C {absolute_path} clean -df") - return_code_clean = subprocess.Popen( - [cmd_clean], shell=True).wait() + try: + result = subprocess.run( + cmd_clean, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=300, # 5 minutes for git clean + check=False + ) + return_code_clean = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {cmd_clean}') + return_code_clean = -1 except Exception as e: @@ -453,7 +564,18 @@ def git_repo_updates(facade_helper, repo_git): cmdpull2 = (f"git -C {absolute_path} pull") print(cmdpull2) - return_code = subprocess.Popen([cmdpull2], shell=True).wait() + try: + result = subprocess.run( + cmdpull2, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=600, # 10 minutes for git pull + check=False + ) + return_code = result.returncode + except subprocess.TimeoutExpired: + facade_helper.log_activity('Error', f'Git operation timed out: {cmdpull2}') + return_code = -1 # Timeout error code attempt += 1 diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index c06614ac7d..6df720584c 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -105,13 +105,22 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name): return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}" -def get_parent_commits_set(absolute_repo_path): - - parents = subprocess.Popen(["git --git-dir %s log --ignore-missing " - "--pretty=format:'%%H'" % (absolute_repo_path)], - stdout=subprocess.PIPE, shell=True) +def get_parent_commits_set(absolute_repo_path, logger=None): - parent_commits = set(parents.stdout.read().decode("utf-8",errors="ignore").split(os.linesep)) + cmd = "git --git-dir %s log --ignore-missing --pretty=format:'%%H'" % (absolute_repo_path) + try: + result = subprocess.run( + cmd, shell=True, + capture_output=True, + encoding='utf-8', errors='replace', # Handle non-UTF-8 gracefully + timeout=600, # 10 minutes for git log + check=False + ) + parent_commits = set(result.stdout.split(os.linesep)) + except subprocess.TimeoutExpired: + if logger: + logger.error(f"Git log timed out for repo: {absolute_repo_path}") + parent_commits = set() # Return empty set on timeout # If there are no commits in the range, we still get a blank entry in # the set. Remove it, as it messes with the calculations From 0d487afd9774aa495b499248fc334b23266fc435 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 11:05:25 -0500 Subject: [PATCH 036/104] fix: Use list.clear() in facade tasks to reduce memory overhead Signed-off-by: Shlok Gilda --- augur/tasks/git/facade_tasks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index 5baaed20d4..d5ead38d14 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -212,7 +212,7 @@ def facade_fetch_missing_commit_messages(repo_git): if len(to_insert) >= 1000: bulk_insert_dicts(logger,to_insert, CommitMessage, ["repo_id","cmt_hash"]) - to_insert = [] + to_insert.clear() to_insert.append(msg_record) except Exception as e: @@ -313,13 +313,14 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: ) if pendingCommitRecordsToInsert: facade_bulk_insert_commits(logger, pendingCommitRecordsToInsert) - pendingCommitRecordsToInsert = [] + pendingCommitRecordsToInsert.clear() if commit_msg: pendingCommitMessageRecordsToInsert.append(commit_msg) if len(pendingCommitMessageRecordsToInsert) >= 1000: bulk_insert_dicts(logger, pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id", "cmt_hash"]) + pendingCommitMessageRecordsToInsert.clear() # FINAL MESSAGE INSERT bulk_insert_dicts(logger, pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id", "cmt_hash"]) From 19d0a9b37c00cff9ca9d89c01c9d3a8f32db39b4 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 11:05:46 -0500 Subject: [PATCH 037/104] fix: Process facade contributor results in batches Signed-off-by: Shlok Gilda --- augur/tasks/github/facade_github/tasks.py | 33 +++++++++++++++++++---- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index eff64df6ee..3396de7b64 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -252,7 +252,6 @@ def insert_facade_contributors(self, repo_git): #Execute statement with session. result = execute_sql(new_contrib_sql) - new_contribs = [dict(row) for row in result.mappings()] #print(new_contribs) @@ -262,7 +261,20 @@ def insert_facade_contributors(self, repo_git): key_auth = GithubRandomKeyAuth(logger) - process_commit_metadata(logger, key_auth, list(new_contribs), repo_id, platform_id) + # Process results in batches to reduce memory usage + batch = [] + BATCH_SIZE = 1000 + + for row in result.mappings(): + batch.append(dict(row)) + + if len(batch) >= BATCH_SIZE: + process_commit_metadata(logger, key_auth, batch, repo_id, platform_id) + batch.clear() + + # Process remaining items in batch + if batch: + process_commit_metadata(logger, key_auth, batch, repo_id, platform_id) logger.debug("DEBUG: Got through the new_contribs") @@ -300,10 +312,21 @@ def insert_facade_contributors(self, repo_git): result = execute_sql(resolve_email_to_cntrb_id_sql) - existing_cntrb_emails = [dict(row) for row in result.mappings()] - print(existing_cntrb_emails) - link_commits_to_contributor(logger, facade_helper,list(existing_cntrb_emails)) + # Process results in batches to reduce memory usage + batch = [] + BATCH_SIZE = 1000 + + for row in result.mappings(): + batch.append(dict(row)) + + if len(batch) >= BATCH_SIZE: + link_commits_to_contributor(logger, facade_helper, batch) + batch.clear() + + # Process remaining items in batch + if batch: + link_commits_to_contributor(logger, facade_helper, batch) return From 05165f10838423b244b99c9c7edb4d1589ab445c Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 11:06:26 -0500 Subject: [PATCH 038/104] fix: Convert issues collection to generator pattern with batching Signed-off-by: Shlok Gilda --- augur/tasks/github/issues.py | 68 ++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index 37bee5c8dd..d100d511bc 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -1,6 +1,6 @@ import logging import traceback -from datetime import timedelta, timezone +from datetime import timedelta, timezone, datetime from sqlalchemy.exc import IntegrityError @@ -20,9 +20,21 @@ development = get_development_flag() @celery.task(base=AugurCoreRepoCollectionTask) -def collect_issues(repo_git : str, full_collection: bool) -> int: +def collect_issues(repo_git: str, full_collection: bool) -> int: + """ + Collect all issues (excluding pull requests) for a repository. - logger = logging.getLogger(collect_issues.__name__) + Retrieves issues from GitHub API in batches of 1000 and inserts them along with + related labels, assignees, and contributors. + + Args: + repo_git: Full git URL (e.g., 'https://github.com/chaoss/augur') + full_collection: True for all historical data, False for incremental (last collection - 2 days) + + Returns: + Number of issues collected, or -1 on error + """ + logger = logging.getLogger(collect_issues.__name__) repo_id = get_repo_by_repo_git(repo_git).repo_id @@ -31,33 +43,60 @@ def collect_issues(repo_git : str, full_collection: bool) -> int: if full_collection: core_data_last_collected = None else: - # subtract 2 days to ensure all data is collected + # Subtract 2 days to ensure all data is collected core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc) key_auth = GithubRandomKeyAuth(logger) logger.info(f'this is the manifest.key_auth value: {str(key_auth)}') - try: - issue_data = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected) + try: + issue_data_generator = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected) - if not issue_data: - logger.info(f"{owner}/{repo} has no issues") - return 0 + # Process issues in batches to avoid memory spikes + batch = [] + total_issues = 0 + batch_size = 1000 + + for issue in issue_data_generator: + batch.append(issue) - total_issues = len(issue_data) - process_issues(issue_data, f"{owner}/{repo}: Issue task", repo_id, logger) + if len(batch) >= batch_size: + logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues})") + process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger) + total_issues += len(batch) + batch.clear() + + # Process remaining issues in the last batch + if len(batch) > 0: + logger.info(f"{owner}/{repo}: Processing final batch of {len(batch)} issues") + process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger) + total_issues += len(batch) + + if total_issues == 0: + logger.info(f"{owner}/{repo} has no issues") return total_issues - + except Exception as e: logger.error(f"Could not collect issues for repo {repo_git}\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}") return -1 -def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None: +def retrieve_all_issue_data(repo_git: str, logger:logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None): + """ + Retrieve all issue data for a repository as a generator. + + Returns a generator to avoid materializing all issues in memory at once. + This is critical for repos with 10,000+ issues to prevent memory spikes. + Args: + repo_git (str): The GitHub repository in "owner/repo" format. + logger (logging.Logger): Logger for logging messages. + key_auth (GithubRandomKeyAuth): Auth handler for GitHub API. + since (datetime, optional): Only issues updated since this datetime will be retrieved. + """ owner, repo = get_owner_repo(repo_git) logger.info(f"Collecting issues for {owner}/{repo}") @@ -74,7 +113,8 @@ def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None: issues_paginator = github_data_access.paginate_resource(url) - return list(issues_paginator) + # Return the generator directly instead of materializing it + return issues_paginator def process_issues(issues, task_name, repo_id, logger) -> None: From 40f9fab2ee1228db3aa65f0191e2c8541def9dd8 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 11:06:50 -0500 Subject: [PATCH 039/104] fix: Add batch processing to PR commits and files collection Signed-off-by: Shlok Gilda --- .../pull_requests/commits_model/core.py | 14 +++++++--- .../github/pull_requests/files_model/core.py | 14 +++++++--- augur/tasks/github/pull_requests/tasks.py | 27 ++++++------------- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index 2df6d66f5d..83b283bb6d 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -43,13 +43,15 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti logger.info(f"Getting pull request commits for repo: {repo.repo_git}") github_data_access = GithubDataAccess(key_auth, logger) - + + BATCH_SIZE = 1000 + pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] all_data = [] for index,pr_info in enumerate(pr_urls): logger.info(f'{task_name}: Querying commits for pull request #{index + 1} of {len(pr_urls)}') commits_url = pr_info['pr_url'] + '/commits?state=all' - + if not pr_info.get('pr_url'): logger.warning(f"{task_name}: No pr_url found for pull request info: {pr_info}. Skipping.") continue @@ -70,13 +72,17 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti 'repo_id': repo.repo_id, } all_data.append(pr_commit_row) + + if len(all_data) >= BATCH_SIZE: + logger.info(f"{task_name}: Inserting {len(all_data)} rows") + augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys) + all_data.clear() except UrlNotFoundException: logger.info(f"{task_name}: PR with url of {pr_info['pr_url']} returned 404 on commit data. Skipping.") continue - + if len(all_data) > 0: logger.info(f"{task_name}: Inserting {len(all_data)} rows") - pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys) diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index cbecb44d6d..60222a3bc1 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -40,12 +40,14 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) + BATCH_SIZE = 1000 + pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"] pr_file_rows = [] logger.info(f"Getting pull request files for repo: {repo.repo_git}") for index, pr_info in enumerate(pr_numbers): logger.info(f'Querying files for pull request #{index + 1} of {len(pr_numbers)}') - + query = """ query($repo: String!, $owner: String!,$pr_number: Int!, $numRecords: Int!, $cursor: String) { repository(name: $repo, owner: $owner) { @@ -68,7 +70,7 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection } } """ - + values = ["repository", "pullRequest", "files"] params = { 'owner': owner, @@ -92,6 +94,11 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection } pr_file_rows.append(data) + + if len(pr_file_rows) >= BATCH_SIZE: + logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows") + augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys) + pr_file_rows.clear() except NotFoundException as e: logger.info(f"{task_name}: PR with number of {pr_info['pr_src_number']} returned 404 on file data. Skipping.") continue @@ -101,6 +108,5 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection if len(pr_file_rows) > 0: - # Execute a bulk upsert with sqlalchemy - pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"] + logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows") augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 812a4eef25..88cb5afe21 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -381,6 +381,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: logger.debug(f"{owner}/{repo} No pr reviews for repo") return + # Process contributors (all_pr_reviews already in memory, so no OOM risk) contributors = [] for pull_request_id, reviews in all_pr_reviews.items(): @@ -389,32 +390,20 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: if contributor: contributors.append(contributor) - logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors") - augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) + logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors") + augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) + # Process pr reviews (all_pr_reviews already in memory, so no OOM risk) pr_reviews = [] for pull_request_id, reviews in all_pr_reviews.items(): for review in reviews: - + if "cntrb_id" in review: pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version)) - logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}") - pr_review_natural_keys = ["pr_review_src_id",] - augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) - - - - - - - - - - - - - + logger.info(f"{owner}/{repo}: Inserting {len(pr_reviews)} pr reviews") + pr_review_natural_keys = ["pr_review_src_id",] + augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) From a2c1b78a4fda93a265e4f4c7da1b5f95d9da04d2 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 20 Nov 2025 11:42:45 -0600 Subject: [PATCH 040/104] Update augur/tasks/github/issues.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Sean P. Goggins --- augur/tasks/github/issues.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index d100d511bc..68cae4d30c 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -84,7 +84,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: -def retrieve_all_issue_data(repo_git: str, logger:logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None): +def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None): """ Retrieve all issue data for a repository as a generator. From 11019b796b9368fe8185da649217bb6644f05a50 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 20 Nov 2025 11:43:41 -0600 Subject: [PATCH 041/104] Update augur/tasks/github/issues.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Sean P. Goggins --- augur/tasks/github/issues.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index 68cae4d30c..aaca35ed5f 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -62,7 +62,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: batch.append(issue) if len(batch) >= batch_size: - logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues})") + logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues + len(batch)})") process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger) total_issues += len(batch) batch.clear() From 6365814fd1dae26819b796f0c466bc9cc46193a0 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 17:05:21 -0500 Subject: [PATCH 042/104] fix: Optimize database cursor usage by fetching results immediately in insert_facade_contributors Signed-off-by: Shlok Gilda --- augur/tasks/github/facade_github/tasks.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 3396de7b64..73fd9a51b5 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -253,6 +253,10 @@ def insert_facade_contributors(self, repo_git): #Execute statement with session. result = execute_sql(new_contrib_sql) + # Fetch all results immediately to close the database cursor/connection + # This prevents holding the connection open during GitHub API calls + rows = result.mappings().fetchall() + #print(new_contribs) #json.loads(pd.read_sql(new_contrib_sql, self.db, params={ @@ -265,7 +269,7 @@ def insert_facade_contributors(self, repo_git): batch = [] BATCH_SIZE = 1000 - for row in result.mappings(): + for row in rows: batch.append(dict(row)) if len(batch) >= BATCH_SIZE: @@ -313,11 +317,15 @@ def insert_facade_contributors(self, repo_git): result = execute_sql(resolve_email_to_cntrb_id_sql) + # Fetch all results immediately to close the database cursor/connection + # This prevents holding the connection open during database UPDATE operations + rows = result.mappings().fetchall() + # Process results in batches to reduce memory usage batch = [] BATCH_SIZE = 1000 - for row in result.mappings(): + for row in rows: batch.append(dict(row)) if len(batch) >= BATCH_SIZE: From f8f06a259b78f0d7396539df5a7cefd4154d6349 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 17:30:56 -0500 Subject: [PATCH 043/104] refactor git command execution to use unified timeout handling across facade operation Signed-off-by: Shlok Gilda --- augur/tasks/git/facade_tasks.py | 4 +- .../facade_worker/facade_worker/config.py | 46 ++++ .../facade_worker/facade_worker/repofetch.py | 240 ++++++------------ .../facade_worker/utilitymethods.py | 28 +- 4 files changed, 140 insertions(+), 178 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index 5baaed20d4..b0d638768f 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -121,7 +121,7 @@ def trim_commits_post_analysis_facade_task(repo_git): repo_loc = (f"{absolute_path}/.git") # Grab the parents of HEAD - parent_commits = get_parent_commits_set(repo_loc) + parent_commits = get_parent_commits_set(repo_loc, facade_helper) # Grab the existing commits from the database existing_commits = get_existing_commits_set(repo_id) @@ -244,7 +244,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: repo_loc = (f"{absolute_path}/.git") # Grab the parents of HEAD - parent_commits = get_parent_commits_set(repo_loc) + parent_commits = get_parent_commits_set(repo_loc, facade_helper) # Grab the existing commits from the database existing_commits = get_existing_commits_set(repo_id) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index 21fe424d10..6f9cd2cc98 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -29,6 +29,7 @@ import json import logging import random +import subprocess from urllib.parse import urlparse import sqlalchemy as s from sqlalchemy.exc import OperationalError @@ -254,3 +255,48 @@ def insert_or_update_data(self, query, **bind_args)-> None: return def inc_repos_processed(self): self.repos_processed += 1 + + def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, operation_description: str = None) -> tuple: + """ + Execute a git command with timeout handling. + + This method provides a unified interface for running git commands with + consistent timeout handling and error logging across all facade operations. + + Args: + cmd: The git command to execute + timeout: Timeout in seconds + capture_output: If True, capture stdout/stderr; if False, discard them + operation_description: Human-readable description for error logging + (defaults to cmd if not provided) + + Returns: + tuple: (return_code, stdout_content) + return_code is -1 on timeout + stdout_content is empty string if capture_output=False + """ + if operation_description is None: + operation_description = cmd + + try: + if capture_output: + result = subprocess.run( + cmd, shell=True, + capture_output=True, + encoding='utf-8', errors='replace', + timeout=timeout, + check=False + ) + return result.returncode, result.stdout.strip() + else: + result = subprocess.run( + cmd, shell=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=timeout, + check=False + ) + return result.returncode, '' + except subprocess.TimeoutExpired: + self.log_activity('Error', f'Git operation timed out: {operation_description}') + return -1, '' diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index 18854c00bd..6e911f6fd9 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -149,18 +149,12 @@ def git_repo_initialize(facade_helper, session, repo_git): facade_helper.log_activity('Verbose', f"Cloning: {git}") cmd = f"git -C {repo_path} clone '{git}' {repo_name}" - try: - result = subprocess.run( - cmd, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=7200, # 2 hours for large repos - check=False - ) - return_code = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git clone timed out: {cmd}') - return_code = -1 # Timeout error code + return_code, _ = facade_helper.run_git_command( + cmd, + timeout=7200, # 2 hours for large repos + capture_output=False, + operation_description=f'git clone {git}' + ) if (return_code == 0): # If cloning succeeded, repo is ready for analysis @@ -328,18 +322,12 @@ def git_repo_updates(facade_helper, repo_git): firstpull = (f"git -C {absolute_path} pull") - try: - result = subprocess.run( - firstpull, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=600, # 10 minutes for git pull - check=False - ) - return_code_remote = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {firstpull}') - return_code_remote = -1 # Timeout error code + return_code_remote, _ = facade_helper.run_git_command( + firstpull, + timeout=600, # 10 minutes for git pull + capture_output=False, + operation_description=f'git pull {repo.repo_git}' + ) facade_helper.log_activity('Verbose', 'Got to here. 1.') @@ -355,20 +343,12 @@ def git_repo_updates(facade_helper, repo_git): getremotedefault = ( f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'") - try: - result = subprocess.run( - getremotedefault, shell=True, - capture_output=True, - encoding='utf-8', errors='replace', - timeout=60, # 1 minute for remote query - check=False - ) - return_code_remote = result.returncode - remotedefault = result.stdout.strip() - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}') - return_code_remote = -1 - remotedefault = '' + return_code_remote, remotedefault = facade_helper.run_git_command( + getremotedefault, + timeout=60, # 1 minute for remote query + capture_output=True, + operation_description='get remote default branch' + ) facade_helper.log_activity( 'Verbose', f'remote default getting checked out is: {remotedefault}.') @@ -379,35 +359,23 @@ def git_repo_updates(facade_helper, repo_git): facade_helper.log_activity( 'Verbose', f"get remote default command is: \n \n {getremotedefault} \n \n ") - try: - result = subprocess.run( - getremotedefault, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=600, # 10 minutes for git checkout - check=False - ) - return_code_remote_default_again = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}') - return_code_remote_default_again = -1 # Timeout error code + return_code_remote_default_again, _ = facade_helper.run_git_command( + getremotedefault, + timeout=600, # 10 minutes for git checkout + capture_output=False, + operation_description=f'git checkout {remotedefault}' + ) if return_code_remote_default_again == 0: facade_helper.log_activity('Verbose', "local checkout worked.") cmd = (f"git -C {absolute_path} pull") - try: - result = subprocess.run( - cmd, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=600, # 10 minutes for git pull - check=False - ) - return_code = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {cmd}') - return_code = -1 # Timeout error code + return_code, _ = facade_helper.run_git_command( + cmd, + timeout=600, # 10 minutes for git pull + capture_output=False, + operation_description=f'git pull {repo.repo_git}' + ) except Exception as e: facade_helper.log_activity( @@ -418,18 +386,12 @@ def git_repo_updates(facade_helper, repo_git): cmd = (f"git -C {absolute_path} pull") - try: - result = subprocess.run( - cmd, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=600, # 10 minutes for git pull - check=False - ) - return_code = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {cmd}') - return_code = -1 # Timeout error code + return_code, _ = facade_helper.run_git_command( + cmd, + timeout=600, # 10 minutes for git pull + capture_output=False, + operation_description=f'git pull {repo.repo_git}' + ) # If the attempt succeeded, then don't try any further fixes. If # the attempt to fix things failed, give up and try next time. @@ -452,58 +414,36 @@ def git_repo_updates(facade_helper, repo_git): getremotedefault = ( f"git -C {absolute_path} remote show origin | sed -n '/HEAD branch/s/.*: //p'") - try: - result = subprocess.run( - getremotedefault, shell=True, - capture_output=True, - encoding='utf-8', errors='replace', - timeout=60, # 1 minute for remote query - check=False - ) - return_code_remote = result.returncode - remotedefault = result.stdout.strip() - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}') - return_code_remote = -1 - remotedefault = '' + return_code_remote, remotedefault = facade_helper.run_git_command( + getremotedefault, + timeout=60, # 1 minute for remote query + capture_output=True, + operation_description='get remote default branch' + ) try: getremotedefault = ( f"git -C {absolute_path} checkout {remotedefault}") - try: - result = subprocess.run( - getremotedefault, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=600, # 10 minutes for git checkout - check=False - ) - return_code_remote_default = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {getremotedefault}') - return_code_remote_default = -1 # Timeout error code + return_code_remote_default, _ = facade_helper.run_git_command( + getremotedefault, + timeout=600, # 10 minutes for git checkout + capture_output=False, + operation_description=f'git checkout {remotedefault}' + ) facade_helper.log_activity( 'Verbose', f'get remote default result (return code): {return_code_remote_default}') getcurrentbranch = (f"git -C {absolute_path} branch") - try: - result = subprocess.run( - getcurrentbranch, shell=True, - capture_output=True, - encoding='utf-8', errors='replace', - timeout=60, # 1 minute for branch query - check=False - ) - return_code_local = result.returncode - localdefault = result.stdout - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {getcurrentbranch}') - return_code_local = -1 - localdefault = '' + return_code_local, localdefault = facade_helper.run_git_command( + getcurrentbranch, + timeout=60, # 1 minute for branch query + capture_output=True, + operation_description='get current branch' + ) facade_helper.log_activity( 'Verbose', f'remote default is: {remotedefault}, and localdefault is {localdefault}.') @@ -511,50 +451,32 @@ def git_repo_updates(facade_helper, repo_git): cmd_checkout_default = ( f"git -C {absolute_path} checkout {remotedefault}") - try: - result = subprocess.run( - cmd_checkout_default, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=600, # 10 minutes for git checkout - check=False - ) - cmd_checkout_default_wait = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {cmd_checkout_default}') - cmd_checkout_default_wait = -1 + cmd_checkout_default_wait, _ = facade_helper.run_git_command( + cmd_checkout_default, + timeout=600, # 10 minutes for git checkout + capture_output=False, + operation_description=f'git checkout {remotedefault}' + ) cmdpull2 = (f"git -C {absolute_path} pull") cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}") - try: - result = subprocess.run( - cmd_reset, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=300, # 5 minutes for git reset - check=False - ) - cmd_reset_wait = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {cmd_reset}') - cmd_reset_wait = -1 + cmd_reset_wait, _ = facade_helper.run_git_command( + cmd_reset, + timeout=300, # 5 minutes for git reset + capture_output=False, + operation_description=f'git reset --hard origin/{remotedefault}' + ) cmd_clean = (f"git -C {absolute_path} clean -df") - try: - result = subprocess.run( - cmd_clean, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=300, # 5 minutes for git clean - check=False - ) - return_code_clean = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {cmd_clean}') - return_code_clean = -1 + return_code_clean, _ = facade_helper.run_git_command( + cmd_clean, + timeout=300, # 5 minutes for git clean + capture_output=False, + operation_description='git clean -df' + ) except Exception as e: @@ -564,18 +486,12 @@ def git_repo_updates(facade_helper, repo_git): cmdpull2 = (f"git -C {absolute_path} pull") print(cmdpull2) - try: - result = subprocess.run( - cmdpull2, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=600, # 10 minutes for git pull - check=False - ) - return_code = result.returncode - except subprocess.TimeoutExpired: - facade_helper.log_activity('Error', f'Git operation timed out: {cmdpull2}') - return_code = -1 # Timeout error code + return_code, _ = facade_helper.run_git_command( + cmdpull2, + timeout=600, # 10 minutes for git pull + capture_output=False, + operation_description=f'git pull {repo.repo_git}' + ) attempt += 1 diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index 6df720584c..92546002ae 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -105,22 +105,22 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name): return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}" -def get_parent_commits_set(absolute_repo_path, logger=None): +def get_parent_commits_set(absolute_repo_path, facade_helper, logger=None): cmd = "git --git-dir %s log --ignore-missing --pretty=format:'%%H'" % (absolute_repo_path) - try: - result = subprocess.run( - cmd, shell=True, - capture_output=True, - encoding='utf-8', errors='replace', # Handle non-UTF-8 gracefully - timeout=600, # 10 minutes for git log - check=False - ) - parent_commits = set(result.stdout.split(os.linesep)) - except subprocess.TimeoutExpired: - if logger: - logger.error(f"Git log timed out for repo: {absolute_repo_path}") - parent_commits = set() # Return empty set on timeout + + # Use facade_helper's unified git command runner + return_code, stdout = facade_helper.run_git_command( + cmd, + timeout=600, # 10 minutes for git log + capture_output=True, + operation_description=f'git log for {absolute_repo_path}' + ) + + if return_code == 0: + parent_commits = set(stdout.split(os.linesep)) + else: + parent_commits = set() # Return empty set on timeout or error # If there are no commits in the range, we still get a blank entry in # the set. Remove it, as it messes with the calculations From 7bf42a3736d6d296990d0292708325e7adb4b023 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Mon, 1 Dec 2025 10:01:56 -0500 Subject: [PATCH 044/104] refactor subprocess.run calls in FacadeHelper to use common options Signed-off-by: Shlok Gilda --- .../facade_worker/facade_worker/config.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index 6f9cd2cc98..09f3c9d6ca 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -279,23 +279,28 @@ def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, operation_description = cmd try: + # Common options for all subprocess.run calls + run_options = { + 'shell': True, + 'timeout': timeout, + 'check': False + } + + # Add capture_output-specific options + if capture_output: + run_options['capture_output'] = True + run_options['encoding'] = 'utf-8' + run_options['errors'] = 'replace' + else: + run_options['stdout'] = subprocess.DEVNULL + run_options['stderr'] = subprocess.DEVNULL + + result = subprocess.run(cmd, **run_options) + + # Return appropriate output based on capture_output flag if capture_output: - result = subprocess.run( - cmd, shell=True, - capture_output=True, - encoding='utf-8', errors='replace', - timeout=timeout, - check=False - ) return result.returncode, result.stdout.strip() else: - result = subprocess.run( - cmd, shell=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - timeout=timeout, - check=False - ) return result.returncode, '' except subprocess.TimeoutExpired: self.log_activity('Error', f'Git operation timed out: {operation_description}') From 1ccc8dd1e51ee2776bcda331d92725ce21da51ba Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 1 Dec 2025 15:29:49 -0500 Subject: [PATCH 045/104] Pylint and other style fixes Signed-off-by: Adrian Edwards --- augur/tasks/git/facade_tasks.py | 2 +- augur/tasks/github/facade_github/tasks.py | 1 - augur/tasks/github/issues.py | 2 +- augur/tasks/github/pull_requests/tasks.py | 34 ++++------------------- 4 files changed, 7 insertions(+), 32 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index d5ead38d14..8303aab5b1 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -255,7 +255,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: facade_helper.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}") - if not len(missing_commits) or repo_id is None: + if missing_commits or repo_id is None: #session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits)) return diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 73fd9a51b5..53a3d6648a 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -5,7 +5,6 @@ from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth -from augur.application.db.models import Contributor from augur.tasks.github.facade_github.core import * from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors from augur.application.db.lib import get_session, execute_session_query diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index aaca35ed5f..91e56deaf7 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -12,7 +12,7 @@ from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo from augur.tasks.util.worker_util import remove_duplicate_dicts -from augur.application.db.models import Issue, IssueLabel, IssueAssignee, Contributor +from augur.application.db.models import Issue, IssueLabel, IssueAssignee from augur.application.config import get_development_flag from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 88cb5afe21..f18a656a98 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -11,12 +11,12 @@ from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth -from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors +from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors from augur.application.db.util import execute_session_query from ..messages import process_github_comment_contributors from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected -from typing import Generator, List, Dict +from typing import List platform_id = 1 @@ -52,15 +52,15 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int: total_count += len(all_data) all_data.clear() - if len(all_data): + if all_data: process_pull_requests(all_data, f"{owner}/{repo}: Github Pr task", repo_id, logger, augur_db) total_count += len(all_data) if total_count > 0: - return total_count - else: logger.debug(f"{owner}/{repo} has no pull requests") return 0 + + return total_count @@ -182,30 +182,6 @@ def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): pr_metadata_natural_keys, string_fields=pr_metadata_string_fields) - - - - - - - - - - - - - - - - - - - - - - - - def process_pull_request_review_contributor(pr_review: dict, tool_source: str, tool_version: str, data_source: str): # get contributor data and set pr cntrb_id From aac134ea9acb078b0c1bab083fb346737f0aebcd Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 12:13:25 -0500 Subject: [PATCH 046/104] remove three files that are entirely comments Signed-off-by: Adrian Edwards --- augur/application/db/models/augur_data_old.py | 2803 ----------------- .../db/models/augur_operations_old.py | 123 - augur/application/db/models/spdx_old.py | 525 --- 3 files changed, 3451 deletions(-) delete mode 100644 augur/application/db/models/augur_data_old.py delete mode 100644 augur/application/db/models/augur_operations_old.py delete mode 100644 augur/application/db/models/spdx_old.py diff --git a/augur/application/db/models/augur_data_old.py b/augur/application/db/models/augur_data_old.py deleted file mode 100644 index 5a71e4ede8..0000000000 --- a/augur/application/db/models/augur_data_old.py +++ /dev/null @@ -1,2803 +0,0 @@ -# from augur.application.db.models.base import Base -# from sqlalchemy import ( -# Column, -# Integer, -# String, -# UniqueConstraint, -# ForeignKey, -# Text, -# Boolean, -# BigInteger, -# SmallInteger, -# Index, -# Float, -# func, -# Date, -# text, -# Numeric, -# PrimaryKeyConstraint, -# CHAR, -# TIMESTAMP, -# JSON, -# ) -# from sqlalchemy.dialects.postgresql import JSONB -# from sqlalchemy.orm import relationship - -# # TODO: look at how facade queries it and add index - -# # TODO: look at how facade queries it and add index -# class AnalysisLog(Base): -# analysis_log_id = Column(BigInteger, primary_key=True) -# repos_id = Column(Integer, nullable=False) -# status = Column(String(), nullable=False) -# date_attempted = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) - -# # this is an insert always table so it does not need a UniqueConstraint -# __tablename__ = "analysis_log" -# __table_args__ = (Index("repos_id", repos_id), {"schema": "augur_data"}) - - -# # TODO: Manually filled by creation script -# # TODO: Could revive this table_ - - -# class ChaossMetricStatus(Base): -# cms_id = Column(BigInteger, primary_key=True, nullable=False) -# cm_group = Column(String()) -# cm_source = Column(String()) -# cm_type = Column(String()) -# cm_backend_status = Column(String()) -# cm_frontend_status = Column(String()) -# cm_defined = Column(Boolean()) -# cm_api_endpoint_repo = Column(String()) -# cm_api_endpoint_rg = Column(String()) -# cm_name = Column(String()) -# cm_working_group = Column(String()) -# cm_info = Column(JSON()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) -# cm_working_group_focus_area = Column(String()) - -# __tablename__ = "chaoss_metric_status" -# __table_args__ = { -# "schema": "augur_data", -# "comment": "This table used to track CHAOSS Metric implementations in Augur, but due to the constantly changing location of that information, it is for the moment not actively populated. ", -# } - - -# class CommitCommentRef(Base): -# cmt_comment_id = Column(BigInteger, primary_key=True, nullable=False) -# cmt_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.commits.cmt_id", -# name="fk_commit_comment_ref_commits_1", -# onupdate="CASCADE", -# ondelete="RESTRICT", -# ), -# nullable=False, -# ) -# repo_id = Column(BigInteger) -# msg_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.message.msg_id", -# name="fk_commit_comment_ref_message_1", -# onupdate="CASCADE", -# ondelete="RESTRICT", -# ), -# nullable=False, -# ) -# user_id = Column(BigInteger, nullable=False) -# body = Column(Text()) -# line = Column(BigInteger) -# position = Column(BigInteger) -# commit_comment_src_node_id = Column( -# String(), -# comment="For data provenance, we store the source node ID if it exists. ", -# ) -# cmt_comment_src_id = Column( -# BigInteger, -# nullable=False, -# comment="For data provenance, we store the source ID if it exists. ", -# ) -# created_at = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# message = relationship("Message", back_populates="commit") -# commit = relationship("Commits", back_populates="msg_ref") - -# __tablename__ = "commit_comment_ref" -# __table_args__ = ( -# Index("comment_id", cmt_comment_src_id, cmt_comment_id, msg_id), -# # unique value for insertion -# UniqueConstraint("cmt_comment_src_id", name="commitcomment"), -# {"schema": "augur_data"}, -# ) - - -# # TODO: This table does not get used so remove it and test without - - -# class CommitParents(Base): -# cmt_id = Column( -# BigInteger, -# ForeignKey("augur_data.commits.cmt_id", name="fk_commit_parents_commits_1"), -# primary_key=True, -# ) -# parent_id = Column( -# BigInteger, -# ForeignKey("augur_data.commits.cmt_id", name="fk_commit_parents_commits_2"), -# primary_key=True, -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "commit_parents" -# __table_args__ = ( -# Index("commit_parents_ibfk_1", cmt_id), -# Index("commit_parents_ibfk_2", parent_id), -# {"schema": "augur_data"}, -# ) - - -# # TODO: Add foriegn key: cmt_author_platform_username = Column(String(), ForeignKey('augur_data.contributors.cntrb_login', name='fk_commits_contributors_3', ondelete="CASCADE", onupdate="CASCADE")) -# # TODO: Add relationship with this foreign key -# class Commits(Base): -# cmt_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_commits_repo_2", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# nullable=False, -# ) -# cmt_commit_hash = Column(String(), nullable=False) -# cmt_author_name = Column(String(), nullable=False) -# cmt_author_raw_email = Column(String(), nullable=False) -# cmt_author_email = Column(String(), nullable=False) -# cmt_author_date = Column(String(), nullable=False) -# cmt_author_affiliation = Column(String(), server_default="NULL") -# cmt_committer_name = Column(String(), nullable=False) -# cmt_committer_raw_email = Column(String(), nullable=False) -# cmt_committer_email = Column(String(), nullable=False) -# cmt_committer_date = Column(String(), nullable=False) -# cmt_committer_affiliation = Column(String(), server_default="NULL") -# cmt_added = Column(Integer, nullable=False) -# cmt_removed = Column(Integer, nullable=False) -# cmt_whitespace = Column(Integer, nullable=False) -# cmt_filename = Column(String(), nullable=False) -# cmt_date_attempted = Column(TIMESTAMP(), nullable=False) -# cmt_ght_author_id = Column(Integer) -# cmt_ght_committer_id = Column(Integer) -# cmt_ght_committed_at = Column(TIMESTAMP()) -# cmt_committer_timestamp = Column(TIMESTAMP(timezone=True)) -# cmt_author_timestamp = Column(TIMESTAMP(timezone=True)) -# # TODO: Appears that this foreign key is duplicated in the database -# cmt_author_platform_username = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# msg_ref = relationship("CommitCommentRef", back_populates="commit") - -# def get_messages(self): - -# messages = [] -# for msg_ref in self.msg_ref: -# messages.append(msg_ref.message) - -# return messages - -# __tablename__ = "commits" -# __table_args__ = ( -# Index("author_affiliation", cmt_author_affiliation, postgresql_using="hash"), -# Index("author_cntrb_id", cmt_ght_author_id), -# Index( -# "author_email,author_affiliation,author_date", -# cmt_author_email, -# cmt_author_affiliation, -# cmt_author_date, -# ), -# Index("author_raw_email", cmt_author_raw_email), -# Index("cmt-author-date-idx2", cmt_author_date), -# Index( -# "cmt_author_contrib_worker", -# cmt_author_name, -# cmt_author_email, -# cmt_author_date, -# postgresql_using="brin", -# ), -# Index( -# "cmt_commiter_contrib_worker", -# cmt_committer_name, -# cmt_committer_email, -# cmt_committer_date, -# postgresql_using="brin", -# ), -# Index("commited", cmt_id), -# Index( -# "commits_idx_cmt_email_cmt_date_cmt_name", -# cmt_author_email, -# cmt_author_date, -# cmt_author_name, -# ), -# Index( -# "commits_idx_repo_id_cmt_ema_cmt_dat_cmt_nam", -# repo_id, -# cmt_author_email, -# cmt_author_date, -# cmt_author_name, -# ), -# Index( -# "commits_idx_repo_id_cmt_ema_cmt_dat_cmt_nam2", -# repo_id, -# cmt_committer_email, -# cmt_committer_date, -# cmt_committer_name, -# ), -# Index( -# "committer_affiliation", cmt_committer_affiliation, postgresql_using="hash" -# ), -# Index( -# "committer_email,committer_affiliation,committer_date", -# cmt_committer_email, -# cmt_committer_affiliation, -# cmt_committer_date, -# ), -# Index("committer_raw_email", cmt_committer_raw_email), -# Index("repo_id,commit", repo_id, cmt_commit_hash), -# { -# "schema": "augur_data", -# "comment": "Commits.\nEach row represents changes to one FILE within a single commit. So you will encounter multiple rows per commit hash in many cases. ", -# }, -# ) - - -# # Current has varchar with length but I changed that -# class ContributorAffiliations(Base): -# ca_id = Column(BigInteger, primary_key=True, nullable=False) -# ca_domain = Column(String(), nullable=False) -# ca_start_date = Column(Date, server_default="1970-01-01") -# ca_last_used = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) -# ca_affiliation = Column(String()) -# ca_active = Column(SmallInteger, server_default=text("1")) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "contributor_affiliations" -# __table_args__ = ( -# UniqueConstraint("ca_domain", name="unique_domain"), -# { -# "schema": "augur_data", -# "comment": "This table exists outside of relations with other tables. The purpose is to provide a dynamic, owner maintained (and augur augmented) list of affiliations. This table is processed in affiliation information in the DM_ tables generated when Augur is finished counting commits using the Facade Worker. ", -# }, -# ) - - -# # TODO: Add foreign key to repo table on cntrb_repo_id - - -# class ContributorRepo(Base): -# cntrb_repo_id = Column(BigInteger, nullable=False) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_contributor_repo_contributors_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# nullable=False, -# comment="This is not null because what is the point without the contributor in this table? ", -# ) -# repo_git = Column( -# String(), -# nullable=False, -# comment="Similar to cntrb_id, we need this data for the table to have meaningful data. ", -# ) -# repo_name = Column(String(), nullable=False) -# gh_repo_id = Column(BigInteger, nullable=False) -# cntrb_category = Column(String()) -# event_id = Column(BigInteger) -# created_at = Column(TIMESTAMP()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "contributor_repo" -# __table_args__ = ( -# PrimaryKeyConstraint("cntrb_repo_id", name="cntrb_repo_id_key"), -# UniqueConstraint("event_id", "tool_version", name="eventer"), -# {"schema": "augur_data"}, -# ) - - -# class Contributors(Base): -# cntrb_id = Column(BigInteger, primary_key=True, nullable=False) -# cntrb_login = Column( -# String(), -# comment="Will be a double population with the same value as gh_login for github, but the local value for other systems. ", -# ) -# cntrb_email = Column( -# String(), -# comment="This needs to be here for matching contributor ids, which are augur, to the commit information. ", -# ) -# cntrb_full_name = Column(String()) -# cntrb_company = Column(String()) -# cntrb_created_at = Column(TIMESTAMP()) -# cntrb_type = Column( -# String(), -# comment="Present in another models. It is not currently used in Augur. ", -# ) -# cntrb_fake = Column(SmallInteger, server_default=text("0")) -# cntrb_deleted = Column(SmallInteger, server_default=text("0")) -# cntrb_long = Column(Numeric(precision=11, scale=8)) -# cntrb_lat = Column(Numeric(precision=10, scale=8)) -# cntrb_country_code = Column(CHAR(length=3)) -# cntrb_state = Column(String()) -# cntrb_city = Column(String()) -# cntrb_location = Column(String()) -# cntrb_canonical = Column(String()) -# cntrb_last_used = Column(TIMESTAMP(timezone=True)) -# gh_user_id = Column(BigInteger) -# gh_login = Column( -# String(), -# comment="populated with the github user name for github originated data. ", -# ) -# gh_url = Column(String()) -# gh_html_url = Column(String()) -# gh_node_id = Column(String()) -# gh_avatar_url = Column(String()) -# gh_gravatar_id = Column(String()) -# gh_followers_url = Column(String()) -# gh_following_url = Column(String()) -# gh_gists_url = Column(String()) -# gh_starred_url = Column(String()) -# gh_subscriptions_url = Column(String()) -# gh_organizations_url = Column(String()) -# gh_repos_url = Column(String()) -# gh_events_url = Column(String()) -# gh_received_events_url = Column(String()) -# gh_type = Column(String()) -# gh_site_admin = Column(String()) -# gl_web_url = Column(String()) -# gl_avatar_url = Column(String()) -# gl_state = Column(String()) -# gl_username = Column(String()) -# gl_full_name = Column(String()) -# gl_id = Column(BigInteger) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# repos_contributed = relationship("ContributorRepo") -# aliases = relationship("ContributorsAliases") -# messages = relationship("Message") - -# __tablename__ = "contributors" -# __table_args__ = ( -# UniqueConstraint( -# "gh_login", name="GH-UNIQUE-C", initially="DEFERRED", deferrable=True -# ), -# UniqueConstraint( -# "gl_id", name="GL-UNIQUE-B", initially="DEFERRED", deferrable=True -# ), -# # unique key for gitlab users on insertion -# UniqueConstraint( -# "gl_username", name="GL-UNIQUE-C", initially="DEFERRED", deferrable=True -# ), -# # unique key to insert on for github -# UniqueConstraint("cntrb_login", name="GL-cntrb-LOGIN-UNIQUE"), -# Index("cnt-fullname", cntrb_full_name, postgresql_using="hash"), -# Index("cntrb-theemail", cntrb_email, postgresql_using="hash"), -# Index("cntrb_canonica-idx11", cntrb_canonical), -# Index("cntrb_login_platform_index", cntrb_login), -# Index( -# "contributor_delete_finder", cntrb_id, cntrb_email, postgresql_using="brin" -# ), -# Index("contributor_worker_email_finder", cntrb_email, postgresql_using="brin"), -# Index( -# "contributor_worker_finder", -# cntrb_login, -# cntrb_email, -# cntrb_id, -# postgresql_using="brin", -# ), -# # TODO: This index is the same as the first one but one has a different stuff -# Index( -# "contributor_worker_fullname_finder", -# cntrb_full_name, -# postgresql_using="brin", -# ), -# Index("contributors_idx_cntrb_email3", cntrb_email), -# # TODO: These last onese appear to be the same -# Index("login", cntrb_login), -# Index("login-contributor-idx", cntrb_login), -# { -# "schema": "augur_data", -# "comment": "For GitHub, this should be repeated from gh_login. for other systems, it should be that systems login.\nGithub now allows a user to change their login name, but their user id remains the same in this case. So, the natural key is the combination of id and login, but there should never be repeated logins. ", -# }, -# ) - - -# class ContributorsAliases(Base): -# cntrb_alias_id = Column(BigInteger, primary_key=True, nullable=False) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_contributors_aliases_contributors_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# nullable=False, -# ) -# canonical_email = Column(String(), nullable=False) -# alias_email = Column(String(), nullable=False) -# cntrb_active = Column(SmallInteger, nullable=False, server_default=text("1")) -# cntrb_last_modified = Column(TIMESTAMP(), server_default=func.current_timestamp()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "contributors_aliases" -# __table_args__ = ( -# UniqueConstraint( -# "alias_email", -# "canonical_email", -# name="only-email-once", -# initially="DEFERRED", -# deferrable=True, -# ), -# { -# "schema": "augur_data", -# "comment": "Every open source user may have more than one email used to make contributions over time. Augur selects the first email it encounters for a user as its “canonical_email”. \n\nThe canonical_email is also added to the contributors_aliases table, with the canonical_email and alias_email being identical. Using this strategy, an email search will only need to join the alias table for basic email information, and can then more easily map the canonical email from each alias row to the same, more detailed information in the contributors table for a user. ", -# }, -# ) - - -# # TODO: Add relationship: Don't understand table well enough -# class DiscourseInsights(Base): -# msg_discourse_id = Column(BigInteger, primary_key=True, nullable=False) -# msg_id = Column( -# BigInteger, -# ForeignKey("augur_data.message.msg_id", name="fk_discourse_insights_message_1"), -# ) -# discourse_act = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "discourse_insights" -# __table_args__ = { -# "schema": "augur_data", -# "comment": "This table is populated by the “Discourse_Analysis_Worker”. It examines sequential discourse, using computational linguistic methods, to draw statistical inferences regarding the discourse in a particular comment thread. ", -# } - - -# # TODO: Add foreign keys to repo and repogroups - - -# class DmRepoAnnual(Base): -# dm_repo_annual_id = Column(BigInteger, primary_key=True) -# repo_id = Column(BigInteger, nullable=False) -# email = Column(String(), nullable=False) -# affiliation = Column(String(), server_default="NULL") -# year = Column(SmallInteger, nullable=False) -# added = Column(BigInteger, nullable=False) -# removed = Column(BigInteger, nullable=False) -# whitespace = Column(BigInteger, nullable=False) -# files = Column(BigInteger, nullable=False) -# patches = Column(BigInteger, nullable=False) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "dm_repo_annual" -# __table_args__ = ( -# Index("repo_id,affiliation_copy_1", repo_id, affiliation), -# Index("repo_id,email_copy_1", repo_id, email), -# {"schema": "augur_data"}, -# ) - - -# class DmRepoGroupAnnual(Base): -# dm_repo_group_annual_id = Column(BigInteger, primary_key=True) -# repo_group_id = Column(BigInteger, nullable=False) -# email = Column(String(), nullable=False) -# affiliation = Column(String(), server_default="NULL") -# year = Column(SmallInteger, nullable=False) -# added = Column(BigInteger, nullable=False) -# removed = Column(BigInteger, nullable=False) -# whitespace = Column(BigInteger, nullable=False) -# files = Column(BigInteger, nullable=False) -# patches = Column(BigInteger, nullable=False) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "dm_repo_group_annual" -# __table_args__ = ( -# Index("projects_id,affiliation_copy_1", repo_group_id, affiliation), -# Index("projects_id,email_copy_1", repo_group_id, email), -# {"schema": "augur_data"}, -# ) - - -# class DmRepoGroupMonthly(Base): -# dm_repo_group_monthly_id = Column(BigInteger, primary_key=True) -# repo_group_id = Column(BigInteger, nullable=False) -# email = Column(String(), nullable=False) -# affiliation = Column(String(), server_default="NULL") -# month = Column(SmallInteger, nullable=False) -# year = Column(SmallInteger, nullable=False) -# added = Column(BigInteger, nullable=False) -# removed = Column(BigInteger, nullable=False) -# whitespace = Column(BigInteger, nullable=False) -# files = Column(BigInteger, nullable=False) -# patches = Column(BigInteger, nullable=False) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "dm_repo_group_monthly" -# __table_args__ = ( -# Index("projects_id,affiliation_copy_2", repo_group_id, affiliation), -# Index("projects_id,email_copy_2", repo_group_id, email), -# Index("projects_id,year,affiliation_copy_1", repo_group_id, year, affiliation), -# Index("projects_id,year,email_copy_1", repo_group_id, year, email), -# {"schema": "augur_data"}, -# ) - - -# class DmRepoGroupWeekly(Base): -# dm_repo_group_weekly_id = Column(BigInteger, primary_key=True) -# repo_group_id = Column(BigInteger, nullable=False) -# email = Column(String(), nullable=False) -# affiliation = Column(String(), server_default="NULL") -# week = Column(SmallInteger, nullable=False) -# year = Column(SmallInteger, nullable=False) -# added = Column(BigInteger, nullable=False) -# removed = Column(BigInteger, nullable=False) -# whitespace = Column(BigInteger, nullable=False) -# files = Column(BigInteger, nullable=False) -# patches = Column(BigInteger, nullable=False) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "dm_repo_group_weekly" -# __table_args__ = ( -# Index("projects_id,affiliation", repo_group_id, affiliation), -# Index("projects_id,email", repo_group_id, email), -# Index("projects_id,year,affiliation", repo_group_id, year, affiliation), -# Index("projects_id,year,email", repo_group_id, year, email), -# {"schema": "augur_data"}, -# ) - - -# class DmRepoMonthly(Base): -# dm_repo_monthly_id = Column(BigInteger, primary_key=True) -# repo_id = Column(BigInteger, nullable=False) -# email = Column(String(), nullable=False) -# affiliation = Column(String(), server_default="NULL") -# month = Column(SmallInteger, nullable=False) -# year = Column(SmallInteger, nullable=False) -# added = Column(BigInteger, nullable=False) -# removed = Column(BigInteger, nullable=False) -# whitespace = Column(BigInteger, nullable=False) -# files = Column(BigInteger, nullable=False) -# patches = Column(BigInteger, nullable=False) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "dm_repo_monthly" -# __table_args__ = ( -# Index("repo_id,affiliation_copy_2", repo_id, affiliation), -# Index("repo_id,email_copy_2", repo_id, email), -# Index("repo_id,year,affiliation_copy_1", repo_id, year, affiliation), -# Index("repo_id,year,email_copy_1", repo_id, year, email), -# {"schema": "augur_data"}, -# ) - - -# class DmRepoWeekly(Base): -# dm_repo_weekly_id = Column(BigInteger, primary_key=True) -# repo_id = Column(BigInteger, nullable=False) -# email = Column(String(), nullable=False) -# affiliation = Column(String(), server_default="NULL") -# week = Column(SmallInteger, nullable=False) -# year = Column(SmallInteger, nullable=False) -# added = Column(BigInteger, nullable=False) -# removed = Column(BigInteger, nullable=False) -# whitespace = Column(BigInteger, nullable=False) -# files = Column(BigInteger, nullable=False) -# patches = Column(BigInteger, nullable=False) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "dm_repo_weekly" -# __table_args__ = ( -# Index("repo_id,affiliation", repo_id, affiliation), -# Index("repo_id,email", repo_id, email), -# Index("repo_id,year,affiliation", repo_id, year, affiliation), -# Index("repo_id,year,email", repo_id, year, email), -# {"schema": "augur_data"}, -# ) - - -# class Exclude(Base): -# id = Column(Integer, primary_key=True, nullable=False) -# projects_id = Column(Integer, nullable=False) -# email = Column(String(), server_default="NULL") -# domain = Column(String(), server_default="NULL") - -# __tablename__ = "exclude" -# __table_args__ = {"schema": "augur_data"} - - -# # TODO: Add relationship for repo_id: I don't think the repo_id should be in this table, I think that behavior can be obtained by getting all the issues for a repo then all the issue assignees for those issues -# # TODO: Add relationship for cntrb_id -# class IssueAssignees(Base): -# issue_assignee_id = Column(BigInteger, primary_key=True, nullable=False) -# issue_id = Column( -# BigInteger, -# ForeignKey("augur_data.issues.issue_id", name="fk_issue_assignees_issues_1"), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_issue_assignee_repo_id", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# ) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", name="fk_issue_assignees_contributors_1" -# ), -# ) -# issue_assignee_src_id = Column( -# BigInteger, -# comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.", -# ) -# issue_assignee_src_node = Column( -# String(), -# comment="This character based identifier comes from the source. In the case of GitHub, it is the id that is the second field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "issue_assignees" -# __table_args__ = ( -# Index("issue-cntrb-assign-idx-1", cntrb_id), -# {"schema": "augur_data"}, -# ) - - -# # TODO: Add relationship for repo_id: I don't think the repo_id should be in this table, I think that behavior can be obtained by getting all the issues for a repo then all the issue assignees for those issues -# # TODO: Add relationship for cntrb_id - - -# class IssueEvents(Base): -# event_id = Column(BigInteger, primary_key=True, nullable=False) -# issue_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.issues.issue_id", -# name="fk_issue_events_issues_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# nullable=False, -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_issue_events_repo", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# ) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_issue_events_contributors_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# nullable=False, -# ) -# action = Column(String(), nullable=False) -# action_commit_hash = Column(String()) -# created_at = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) -# node_id = Column( -# String(), -# comment="This should be renamed to issue_event_src_node_id, as its the varchar identifier in GitHub and likely common in other sources as well. However, since it was created before we came to this naming standard and workers are built around it, we have it simply named as node_id. Anywhere you see node_id in the schema, it comes from GitHubs terminology.", -# ) -# node_url = Column(String()) -# issue_event_src_id = Column( -# BigInteger, -# comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) -# platform_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.platform.pltfrm_id", -# name="fk_issue_event_platform_ide", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# ) - -# __tablename__ = "issue_events" -# __table_args__ = ( -# # contstraint to determine whether to insert or not -# UniqueConstraint("issue_id", "issue_event_src_id", name="unique_event_id_key"), -# Index("issue-cntrb-idx2", issue_event_src_id), -# Index("issue_events_ibfk_1", issue_id), -# Index("issue_events_ibfk_2", cntrb_id), -# {"schema": "augur_data"}, -# ) - - -# # TODO: Add relationship for repo_id: I don't think the repo_id should be in this table, I think that behavior can be obtained by getting all the issues for a repo then all the issue assignees for those issues - - -# class IssueLabels(Base): -# issue_label_id = Column(BigInteger, primary_key=True, nullable=False) -# issue_id = Column( -# BigInteger, -# ForeignKey("augur_data.issues.issue_id", name="fk_issue_labels_issues_1"), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_issue_labels_repo_id", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# ) -# label_text = Column(String()) -# label_description = Column(String()) -# label_color = Column(String()) -# label_src_id = Column( -# BigInteger, -# comment="This character based identifier (node) comes from the source. In the case of GitHub, it is the id that is the second field returned from the issue events API JSON subsection for issues.", -# ) -# label_src_node_id = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "issue_labels" -# __table_args__ = ( -# # insert on -# UniqueConstraint("label_src_id", "issue_id", name="unique_issue_label"), -# {"schema": "augur_data"}, -# ) - - -# # TODO: Add replationship: for repo_id - - -# class IssueMessageRef(Base): -# issue_msg_ref_id = Column(BigInteger, primary_key=True, nullable=False) -# issue_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.issues.issue_id", -# name="fk_issue_message_ref_issues_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_repo_id_fk1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# msg_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.message.msg_id", -# name="fk_issue_message_ref_message_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# issue_msg_ref_src_node_id = Column( -# String(), -# comment="This character based identifier comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue comments API", -# ) -# issue_msg_ref_src_comment_id = Column( -# BigInteger, -# comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue comments API", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# message = relationship("Message", back_populates="issue") -# issue = relationship("Issues", back_populates="msg_ref") - -# __tablename__ = "issue_message_ref" -# __table_args__ = ( -# # insert on -# UniqueConstraint( -# "issue_msg_ref_src_comment_id", "tool_source", name="repo-issue" -# ), -# {"schema": "augur_data"}, -# ) - - -# # TODO: Add relationship for cntrb_id -# # should repo_id be allowed to be NULL? - - -# class Issues(Base): -# issue_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_issues_repo", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# reporter_id = Column( -# BigInteger, -# ForeignKey("augur_data.contributors.cntrb_id", name="fk_issues_contributors_2"), -# comment="The ID of the person who opened the issue. ", -# ) -# pull_request = Column(BigInteger) -# pull_request_id = Column(BigInteger) -# created_at = Column(TIMESTAMP()) -# issue_title = Column(String()) -# issue_body = Column(String()) -# cntrb_id = Column( -# BigInteger, -# ForeignKey("augur_data.contributors.cntrb_id", name="fk_issues_contributors_1"), -# comment="The ID of the person who closed the issue. ", -# ) -# comment_count = Column(BigInteger) -# updated_at = Column(TIMESTAMP()) -# closed_at = Column(TIMESTAMP()) -# due_on = Column(TIMESTAMP()) -# repository_url = Column(String()) -# issue_url = Column(String()) -# labels_url = Column(String()) -# comments_url = Column(String()) -# events_url = Column(String()) -# html_url = Column(String()) -# issue_state = Column(String()) -# issue_node_id = Column(String()) -# gh_issue_number = Column(BigInteger) -# gh_issue_id = Column(BigInteger) -# gh_user_id = Column(BigInteger) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# assignees = relationship("IssueAssignees") -# events = relationship("IssueEvents") -# labels = relationship("IssueLabels") - -# msg_ref = relationship("IssueMessageRef", back_populates="issue") - -# def get_messages(self): - -# messages = [] -# for msg_ref in self.msg_ref: -# messages.append(msg_ref.message) - -# return messages - -# __tablename__ = "issues" -# __table_args__ = ( -# Index("issue-cntrb-dix2", cntrb_id), -# Index("issues_ibfk_1", repo_id), -# Index("issues_ibfk_2", reporter_id), -# Index("issues_ibfk_4", pull_request_id), -# {"schema": "augur_data"}, -# ) - - -# # TODO: Should latest_release_timestamp be a timestamp -# class Libraries(Base): -# library_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_libraries_repo_1"), -# ) -# platform = Column(String()) -# name = Column(String()) -# created_timestamp = Column(TIMESTAMP()) -# updated_timestamp = Column(TIMESTAMP()) -# library_description = Column(String()) -# keywords = Column(String()) -# library_homepage = Column(String()) -# license = Column(String()) -# version_count = Column(Integer) -# latest_release_timestamp = Column(String()) -# latest_release_number = Column(String()) -# package_manager_id = Column(String()) -# dependency_count = Column(Integer) -# dependent_library_count = Column(Integer) -# primary_language = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# library_dependencies = relationship("LibraryDependecies") - -# # TODO: Should this be a one to one relationship with library version (this it what I defined it as)? -# library_version = relationship("LibraryVersion", back_populates="library") - -# __tablename__ = "libraries" -# __table_args__ = {"schema": "augur_data"} - - -# class LibraryDependecies(Base): -# lib_dependency_id = Column(BigInteger, primary_key=True, nullable=False) -# library_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.libraries.library_id", -# name="fk_library_dependencies_libraries_1", -# ), -# ) -# manifest_platform = Column(String()) -# manifest_filepath = Column(String()) -# manifest_kind = Column(String()) -# repo_id_branch = Column(String(), nullable=False) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "library_dependencies" -# __table_args__ = (Index("REPO_DEP", library_id), {"schema": "augur_data"}) - - -# class LibraryVersion(Base): -# library_version_id = Column(BigInteger, primary_key=True, nullable=False) -# library_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.libraries.library_id", name="fk_library_version_libraries_1" -# ), -# ) -# library_platform = Column(String()) -# version_number = Column(String()) -# version_release_date = Column(TIMESTAMP()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# library = relationship("Libraries", back_populates="library_version") - -# __tablename__ = "library_version" -# __table_args__ = {"schema": "augur_data"} - - -# class LstmAnomalyModels(Base): -# model_id = Column(BigInteger, primary_key=True, nullable=False) -# model_name = Column(String()) -# model_description = Column(String()) -# look_back_days = Column(BigInteger) -# training_days = Column(BigInteger) -# batch_size = Column(BigInteger) -# metric = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# # TODO: Should this be a one to one relationship? -# model_result = relationship("LstmAnomalyResults") - -# __tablename__ = "lstm_anomaly_models" -# __table_args__ = {"schema": "augur_data"} - - -# class LstmAnomalyResults(Base): -# result_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_lstm_anomaly_results_repo_1"), -# ) -# repo_category = Column(String()) -# model_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.lstm_anomaly_models.model_id", -# name="fk_lstm_anomaly_results_lstm_anomaly_models_1", -# ), -# ) -# metric = Column(String()) -# contamination_factor = Column(Float()) -# mean_absolute_error = Column(Float()) -# remarks = Column(String()) -# metric_field = Column( -# String(), -# comment="This is a listing of all of the endpoint fields included in the generation of the metric. Sometimes there is one, sometimes there is more than one. This will list them all. ", -# ) -# mean_absolute_actual_value = Column(Float()) -# mean_absolute_prediction_value = Column(Float()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "lstm_anomaly_results" -# __table_args__ = {"schema": "augur_data"} - - -# # TODO: I don't think that repo_id needs to be included because this behavior could be achieved by Repo.ParentObj.msg_ref.message (ParentObj is things such as prs or issues) -# # TODO: Add relationship to repo group list serve table - - -# class Message(Base): -# msg_id = Column(BigInteger, primary_key=True, nullable=False) -# rgls_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo_groups_list_serve.rgls_id", -# name="fk_message_repo_groups_list_serve_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# platform_msg_id = Column(BigInteger) -# platform_node_id = Column(String()) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_message_repoid", -# ondelete="CASCADE", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_message_contributors_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# comment="Not populated for mailing lists. Populated for GitHub issues. ", -# ) -# msg_text = Column(String()) -# msg_timestamp = Column(TIMESTAMP()) -# msg_sender_email = Column(String()) -# msg_header = Column(String()) -# pltfrm_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.platform.pltfrm_id", -# name="fk_message_platform_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# nullable=False, -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# # Used this thread to determine how to do one to many relationship with an extra middle table: https://stackoverflow.com/questions/35795717/flask-sqlalchemy-many-to-many-relationship-with-extra-field -# commit = relationship("CommitCommentRef", back_populates="message") -# issue = relationship("IssueMessageRef", back_populates="message") -# pull_request = relationship("PullRequestMessageRef", back_populates="message") -# pr_review = relationship("PullRequestReviewMessageRef", back_populates="message") - -# analysis = relationship("MessageAnalysis", back_populates="message") -# sentiment = relationship("MessageSentiment", back_populates="message") - -# __tablename__ = "message" -# __table_args__ = ( -# UniqueConstraint("platform_msg_id", "tool_source", name="gh-message"), -# Index("messagegrouper", msg_id, rgls_id, unique=True), -# Index("msg-cntrb-id-idx", cntrb_id), -# Index("platformgrouper", msg_id, pltfrm_id), -# {"schema": "augur_data"}, -# ) - - -# class MessageAnalysis(Base): -# msg_analysis_id = Column(BigInteger, primary_key=True, nullable=False) -# msg_id = Column( -# BigInteger, -# ForeignKey("augur_data.message.msg_id", name="fk_message_analysis_message_1"), -# ) -# worker_run_id = Column( -# BigInteger, -# comment="This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis. ", -# ) -# sentiment_score = Column( -# Float(), -# comment="A sentiment analysis score. Zero is neutral, negative numbers are negative sentiment, and positive numbers are positive sentiment. ", -# ) -# reconstruction_error = Column( -# Float(), -# comment="Each message is converted to a 250 dimensin doc2vec vector, so the reconstruction error is the difference between what the predicted vector and the actual vector.", -# ) -# novelty_flag = Column( -# Boolean(), -# comment="This is an analysis of the degree to which the message is novel when compared to other messages in a repository. For example when bots are producing numerous identical messages, the novelty score is low. It would also be a low novelty score when several people are making the same coment. ", -# ) -# feeck_flag = Column( -# Boolean(), -# comment="This exists to provide the user with an opportunity provide feeck on the resulting the sentiment scores. ", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# message = relationship("Message", back_populates="analysis") - -# __tablename__ = "message_analysis" -# __table_args__ = {"schema": "augur_data"} - - -# class MessageAnalysisSummary(Base): -# msg_summary_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", name="fk_message_analysis_summary_repo_1" -# ), -# ) -# worker_run_id = Column(BigInteger) -# positive_ratio = Column(Float()) -# negative_ratio = Column(Float()) -# novel_count = Column(BigInteger) -# period = Column(TIMESTAMP()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# # TODO: Ensure that this is a one to one relationship -# repo = relationship("Repo", back_populates="msg_analysis_summary") - -# __tablename__ = "message_analysis_summary" -# __table_args__ = {"schema": "augur_data"} - - -# class MessageSentiment(Base): -# msg_analysis_id = Column(BigInteger, primary_key=True, nullable=False) -# msg_id = Column( -# BigInteger, -# ForeignKey("augur_data.message.msg_id", name="fk_message_sentiment_message_1"), -# ) -# worker_run_id = Column( -# BigInteger, -# comment="This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis. ", -# ) -# sentiment_score = Column( -# Float(), -# comment="A sentiment analysis score. Zero is neutral, negative numbers are negative sentiment, and positive numbers are positive sentiment. ", -# ) -# reconstruction_error = Column( -# Float(), -# comment="Each message is converted to a 250 dimensin doc2vec vector, so the reconstruction error is the difference between what the predicted vector and the actual vector.", -# ) -# novelty_flag = Column( -# Boolean(), -# comment="This is an analysis of the degree to which the message is novel when compared to other messages in a repository. For example when bots are producing numerous identical messages, the novelty score is low. It would also be a low novelty score when several people are making the same coment. ", -# ) -# feedback = Column( -# Boolean(), -# comment="This exists to provide the user with an opportunity provide feedback on the resulting the sentiment scores. ", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# message = relationship("Message", back_populates="sentiment") - -# __tablename__ = "message_sentiment" -# __table_args__ = {"schema": "augur_data"} - - -# class MessageSentimentSummary(Base): -# msg_summary_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", name="fk_message_sentiment_summary_repo_1" -# ), -# ) -# worker_run_id = Column( -# BigInteger, -# comment='This value should reflect the worker_run_id for the messages summarized in the table. There is not a relation between these two tables for that purpose because its not *really*, relationaly a concept unless we create a third table for "worker_run_id", which we determined was unnecessarily complex. ', -# ) -# positive_ratio = Column(Float()) -# negative_ratio = Column(Float()) -# novel_count = Column( -# BigInteger, -# comment="The number of messages identified as novel during the analyzed period", -# ) -# period = Column( -# TIMESTAMP(), -# comment="The whole timeline is divided into periods based on the definition of time period for analysis, which is user specified. Timestamp of the first period to look at, until the end of messages at the data of execution. ", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# repo = relationship("Repo", back_populates="msg_sentiment_summary") - -# __tablename__ = "message_sentiment_summary" -# __table_args__ = { -# "schema": "augur_data", -# "comment": "In a relationally perfect world, we would have a table called “message_sentiment_run” the incremented the “worker_run_id” for both message_sentiment and message_sentiment_summary. For now, we decided this was overkill. ", -# } - - -# class Platform(Base): -# pltfrm_id = Column(BigInteger, nullable=False) -# pltfrm_name = Column(String()) -# pltfrm_version = Column(String()) -# pltfrm_release_date = Column(Date) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# pr_reviews = relationship("PullRequestReviews") - -# __tablename__ = "platform" -# __table_args__ = ( -# PrimaryKeyConstraint("pltfrm_id", name="theplat"), -# Index("plat", pltfrm_id, unique=True), -# {"schema": "augur_data"}, -# ) - - -# class PullRequestAnalysis(Base): -# pull_request_analysis_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_analysis_pull_requests_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# comment="It would be better if the pull request worker is run first to fetch the latest PRs before analyzing", -# ) -# merge_probability = Column( -# Numeric(precision=256, scale=250), -# comment="Indicates the probability of the PR being merged", -# ) -# mechanism = Column( -# String(), -# comment="the ML model used for prediction (It is XGBoost Classifier at present)", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) - -# pull_request = relationship("PullRequests", back_populates="analysis") - -# __tablename__ = "pull_request_analysis" -# __table_args__ = ( -# Index("pr_anal_idx", pull_request_id), -# Index("probability_idx", merge_probability.desc().nullslast()), -# {"schema": "augur_data"}, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.assignees -# # TODO: Add relationship for cntrb_id - - -# class PullRequestAssignees(Base): -# pr_assignee_map_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_assignees_pull_requests_1", -# ), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_pull_request_assignees_repo_id", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# contrib_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_pull_request_assignees_contributors_1", -# ), -# ) -# pr_assignee_src_id = Column(BigInteger) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_assignees" -# __table_args__ = ( -# Index("pr_meta_cntrb-idx", contrib_id), -# {"schema": "augur_data"}, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.commits -# # TODO: Add relationship for cntrb_id -# class PullRequestCommits(Base): -# pr_cmt_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_commits_pull_requests_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_pull_request_commits_repo_id", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# ) -# pr_cmt_sha = Column( -# String(), -# comment="This is the commit SHA for a pull request commit. If the PR is not to the master branch of the main repository (or, in rare cases, from it), then you will NOT find a corresponding commit SHA in the commit table. (see table comment for further explanation). ", -# ) -# pr_cmt_node_id = Column(String()) -# pr_cmt_message = Column(String()) -# pr_cmt_comments_url = Column(String()) -# pr_cmt_author_cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_pr_commit_cntrb_id", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# pr_cmt_timestamp = Column(TIMESTAMP()) -# pr_cmt_author_email = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_commits" -# __table_args__ = ( -# UniqueConstraint( -# "pull_request_id", "repo_id", "pr_cmt_sha", name="pr_commit_nk" -# ), -# { -# "schema": "augur_data", -# "comment": "Pull request commits are an enumeration of each commit associated with a pull request. \nNot all pull requests are from a branch or fork into master. \nThe commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project).\nTherefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. \nIn cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. ", -# }, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.events -# # TODO: Add relationship for cntrb_id - - -# class PullRequestEvents(Base): -# pr_event_id = Column(BigInteger, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_events_pull_requests_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# nullable=False, -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fkprevent_repo_id", -# ondelete="RESTRICT", -# onupdate="RESTRICT", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_pull_request_events_contributors_1", -# ), -# nullable=False, -# ) -# action = Column(String(), nullable=False) -# action_commit_hash = Column(String()) -# created_at = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) -# issue_event_src_id = Column( -# BigInteger, -# comment="This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API", -# ) -# node_id = Column( -# String(), -# comment="This should be renamed to issue_event_src_node_id, as its the varchar identifier in GitHub and likely common in other sources as well. However, since it was created before we came to this naming standard and workers are built around it, we have it simply named as node_id. Anywhere you see node_id in the schema, it comes from GitHubs terminology.", -# ) -# node_url = Column(String()) -# platform_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.platform.pltfrm_id", -# name="fkpr_platform", -# ondelete="RESTRICT", -# onupdate="RESTRICT", -# initially="DEFERRED", -# deferrable=True, -# ), -# server_default=text("25150"), -# ) -# pr_platform_event_id = Column(BigInteger) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_events" -# __table_args__ = ( -# PrimaryKeyConstraint("pr_event_id", name="pr_events_pkey"), -# UniqueConstraint( -# "pr_platform_event_id", "platform_id", name="unique-pr-event-id" -# ), -# Index("pr_events_ibfk_1", pull_request_id), -# Index("pr_events_ibfk_2", cntrb_id), -# {"schema": "augur_data"}, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.files -# class PullRequestFiles(Base): -# pr_file_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_commits_pull_requests_1_copy_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_pull_request_files_repo_id", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# pr_file_additions = Column(BigInteger) -# pr_file_deletions = Column(BigInteger) -# pr_file_path = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_files" -# __table_args__ = ( -# # TODO: Confirm: Values to determine if insert needed -# UniqueConstraint( -# "pull_request_id", "repo_id", "pr_file_path", name="prfiles_unique" -# ), -# { -# "schema": "augur_data", -# "comment": "Pull request commits are an enumeration of each commit associated with a pull request. \nNot all pull requests are from a branch or fork into master. \nThe commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project).\nTherefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. \nIn cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. ", -# }, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.labels - - -# class PullRequestLabels(Base): -# pr_label_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_labels_pull_requests_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_pull_request_labels_repo", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# ) -# pr_src_id = Column(BigInteger) -# pr_src_node_id = Column(String()) -# pr_src_url = Column(String()) -# pr_src_description = Column(String()) -# pr_src_color = Column(String()) -# pr_src_default_bool = Column(Boolean()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_labels" -# __table_args__ = ( -# # TODO: Confirm: Values to determine if insert needed -# UniqueConstraint("pr_src_id", "pull_request_id", name="unique-pr-src-label-id"), -# {"schema": "augur_data"}, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.msg_ref - - -# class PullRequestMessageRef(Base): -# pr_msg_ref_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_message_ref_pull_requests_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_pr_repo", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# ) -# msg_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.message.msg_id", -# name="fk_pull_request_message_ref_message_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# pr_message_ref_src_comment_id = Column(BigInteger) -# pr_message_ref_src_node_id = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) -# pr_issue_url = Column(String()) - -# message = relationship("Message", back_populates="pull_request") -# pull_request = relationship("PullRequests", back_populates="msg_ref") - -# __tablename__ = "pull_request_message_ref" -# __table_args__ = ( -# # TODO: Confirm: Values to determine if insert needed -# UniqueConstraint( -# "pr_message_ref_src_comment_id", "tool_source", name="pr-comment-nk" -# ), -# {"schema": "augur_data"}, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.meta_data - - -# class PullRequestMeta(Base): -# pr_repo_meta_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_meta_pull_requests_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_pull_request_repo_meta_repo_id", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# pr_head_or_base = Column( -# String(), -# comment="Each pull request should have one and only one head record; and one and only one base record. ", -# ) -# pr_src_meta_label = Column(String()) -# pr_src_meta_ref = Column(String()) -# pr_sha = Column(String()) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_pull_request_meta_contributors_2", -# ), -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_meta" -# __table_args__ = ( -# Index("pr_meta-cntrbid-idx", cntrb_id), -# { -# "schema": "augur_data", -# "comment": 'Pull requests contain referencing metadata. There are a few columns that are discrete. There are also head and base designations for the repo on each side of the pull request. Similar functions exist in GitLab, though the language here is based on GitHub. The JSON Being adapted to as of the development of this schema is here: "base": { "label": "chaoss:dev", "ref": "dev", "sha": "dc6c6f3947f7dc84ecba3d8bda641ef786e7027d", "user": { "login": "chaoss", "id": 29740296, "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2", "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4", "gravatar_id": "", "url": "https://api.github.com/users/chaoss", "html_url": "https://github.com/chaoss", "followers_url": "https://api.github.com/users/chaoss/followers", "following_url": "https://api.github.com/users/chaoss/following{/other_user}", "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}", "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions", "organizations_url": "https://api.github.com/users/chaoss/orgs", "repos_url": "https://api.github.com/users/chaoss/repos", "events_url": "https://api.github.com/users/chaoss/events{/privacy}", "received_events_url": "https://api.github.com/users/chaoss/received_events", "type": "Organization", "site_admin": false }, "repo": { "id": 78134122, "node_id": "MDEwOlJlcG9zaXRvcnk3ODEzNDEyMg==", "name": "augur", "full_name": "chaoss/augur", "private": false, "owner": { "login": "chaoss", "id": 29740296, "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2", "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4", "gravatar_id": "", "url": "https://api.github.com/users/chaoss", "html_url": "https://github.com/chaoss", "followers_url": "https://api.github.com/users/chaoss/followers", "following_url": "https://api.github.com/users/chaoss/following{/other_user}", "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}", "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions", "organizations_url": "https://api.github.com/users/chaoss/orgs", "repos_url": "https://api.github.com/users/chaoss/repos", "events_url": "https://api.github.com/users/chaoss/events{/privacy}", "received_events_url": "https://api.github.com/users/chaoss/received_events", "type": "Organization", "site_admin": false }, ', -# }, -# ) - - -# # TODO: Don't know enough about table structure to create relationship - - -# class PullRequestRepo(Base): -# pr_repo_id = Column(BigInteger, primary_key=True, nullable=False) -# pr_repo_meta_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_request_meta.pr_repo_meta_id", -# name="fk_pull_request_repo_pull_request_meta_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# pr_repo_head_or_base = Column( -# String(), -# comment="For ease of validation checking, we should determine if the repository referenced is the head or base of the pull request. Each pull request should have one and only one of these, which is not enforcable easily in the database.", -# ) -# pr_src_repo_id = Column(BigInteger) -# pr_src_node_id = Column(String()) -# pr_repo_name = Column(String()) -# pr_repo_full_name = Column(String()) -# pr_repo_private_bool = Column(Boolean()) -# pr_cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_pull_request_repo_contributors_1", -# ), -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_repo" -# __table_args__ = ( -# Index("pr-cntrb-idx-repo", pr_cntrb_id), -# { -# "schema": "augur_data", -# "comment": "This table is for storing information about forks that exist as part of a pull request. Generally we do not want to track these like ordinary repositories. ", -# }, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.reviews.msg_ref - - -# class PullRequestReviewMessageRef(Base): -# pr_review_msg_ref_id = Column(BigInteger, nullable=False) -# pr_review_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_request_reviews.pr_review_id", -# name="fk_pull_request_review_message_ref_pull_request_reviews_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# nullable=False, -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_review_repo", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# ) -# msg_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.message.msg_id", -# name="fk_pull_request_review_message_ref_message_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# nullable=False, -# ) -# pr_review_msg_url = Column(String()) -# pr_review_src_id = Column(BigInteger) -# pr_review_msg_src_id = Column(BigInteger) -# pr_review_msg_node_id = Column(String()) -# pr_review_msg_diff_hunk = Column(String()) -# pr_review_msg_path = Column(String()) -# pr_review_msg_position = Column(BigInteger) -# pr_review_msg_original_position = Column(BigInteger) -# pr_review_msg_commit_id = Column(String()) -# pr_review_msg_original_commit_id = Column(String()) -# pr_review_msg_updated_at = Column(TIMESTAMP()) -# pr_review_msg_html_url = Column(String()) -# pr_url = Column(String()) -# pr_review_msg_author_association = Column(String()) -# pr_review_msg_start_line = Column(BigInteger) -# pr_review_msg_original_start_line = Column(BigInteger) -# pr_review_msg_start_side = Column(String()) -# pr_review_msg_line = Column(BigInteger) -# pr_review_msg_original_line = Column(BigInteger) -# pr_review_msg_side = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# message = relationship("Message", back_populates="pr_review") -# pr_review = relationship("PullRequestReviews", back_populates="msg_ref") - -# __tablename__ = "pull_request_review_message_ref" -# __table_args__ = ( -# PrimaryKeyConstraint("pr_review_msg_ref_id", name="pr_review_msg_ref_id"), -# UniqueConstraint("pr_review_msg_src_id", "tool_source", name="pr-review-nk"), -# {"schema": "augur_data"}, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.reviewers -# # TODO: Add cntrb_id relationship (don't understand table well enough) - - -# class PullRequestReviewers(Base): -# pr_reviewer_map_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_reviewers_pull_requests_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# pr_source_id = Column( -# BigInteger, -# comment="The platform ID for the pull/merge request. Used as part of the natural key, along with pr_reviewer_src_id in this table. ", -# ) -# repo_id = Column(BigInteger) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_pull_request_reviewers_contributors_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# pr_reviewer_src_id = Column( -# BigInteger, -# comment="The platform ID for the pull/merge request reviewer. Used as part of the natural key, along with pr_source_id in this table. ", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_reviewers" -# __table_args__ = ( -# UniqueConstraint( -# "pr_source_id", -# "pr_reviewer_src_id", -# name="unique_pr_src_reviewer_key", -# initially="DEFERRED", -# deferrable=True, -# ), -# Index("pr-reviewers-cntrb-idx1", cntrb_id), -# {"schema": "augur_data"}, -# ) - - -# # TODO: I don't think repo_id is needed on this table because it can be achieved by doing Repo.PullRequests.reviews -# # TODO: Add relationship for cntrb_id - - -# class PullRequestReviews(Base): -# pr_review_id = Column(BigInteger, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_reviews_pull_requests_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# nullable=False, -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_repo_review", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# ) -# cntrb_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_pull_request_reviews_contributors_1", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# nullable=False, -# ) -# pr_review_author_association = Column(String()) -# pr_review_state = Column(String()) -# pr_review_body = Column(String()) -# pr_review_submitted_at = Column(TIMESTAMP()) -# pr_review_src_id = Column(BigInteger) -# pr_review_node_id = Column(String()) -# pr_review_html_url = Column(String()) -# pr_review_pull_request_url = Column(String()) -# pr_review_commit_id = Column(String()) -# platform_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.platform.pltfrm_id", -# name="fk-review-platform", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# initially="DEFERRED", -# deferrable=True, -# ), -# server_default=text("25150"), -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# msg_ref = relationship("PullRequestReviewMessageRef", back_populates="pr_review") - -# def get_messages(self): - -# messages = [] -# for msg_ref in self.msg_ref: -# messages.append(msg_ref.message) - -# return messages - -# __tablename__ = "pull_request_reviews" -# __table_args__ = ( -# PrimaryKeyConstraint("pr_review_id", name="pull_request_review_id"), -# UniqueConstraint("pr_review_src_id", "tool_source", name="sourcepr-review-id"), -# {"schema": "augur_data"}, -# ) - - -# class PullRequestTeams(Base): -# pr_team_id = Column(BigInteger, primary_key=True, nullable=False) -# pull_request_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.pull_requests.pull_request_id", -# name="fk_pull_request_teams_pull_requests_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# pr_src_team_id = Column(BigInteger) -# pr_src_team_node = Column(String()) -# pr_src_team_url = Column(String()) -# pr_team_name = Column(String()) -# pr_team_slug = Column(String()) -# pr_team_description = Column(String()) -# pr_team_privacy = Column(String()) -# pr_team_permission = Column(String()) -# pr_team_src_members_url = Column(String()) -# pr_team_src_repositories_url = Column(String()) -# pr_team_parent_id = Column(BigInteger) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "pull_request_teams" -# __table_args__ = {"schema": "augur_data"} - - -# class PullRequests(Base): -# pull_request_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="fk_pull_requests_repo_1", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# server_default=text("0"), -# ) -# pr_url = Column(String()) -# pr_src_id = Column( -# BigInteger, comment="The pr_src_id is unique across all of github." -# ) -# pr_src_node_id = Column(String()) -# pr_html_url = Column(String()) -# pr_diff_url = Column(String()) -# pr_patch_url = Column(String()) -# pr_issue_url = Column(String()) -# pr_augur_issue_id = Column( -# BigInteger, comment="This is to link to the augur stored related issue" -# ) -# pr_src_number = Column( -# BigInteger, comment="The pr_src_number is unique within a repository." -# ) -# pr_src_state = Column(String()) -# pr_src_locked = Column(Boolean()) -# pr_src_title = Column(String()) -# pr_augur_contributor_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.contributors.cntrb_id", -# name="fk_pr_contribs", -# ondelete="RESTRICT", -# onupdate="CASCADE", -# ), -# comment="This is to link to the augur contributor record. ", -# ) -# pr_body = Column(Text()) -# pr_created_at = Column(TIMESTAMP()) -# pr_updated_at = Column(TIMESTAMP()) -# pr_closed_at = Column(TIMESTAMP()) -# pr_merged_at = Column(TIMESTAMP()) -# pr_merge_commit_sha = Column(String()) -# pr_teams = Column(BigInteger, comment="One to many with pull request teams. ") -# pr_milestone = Column(String()) -# pr_commits_url = Column(String()) -# pr_review_comments_url = Column(String()) -# pr_review_comment_url = Column( -# String(), -# comment="This is a field with limited utility. It does expose how to access a specific comment if needed with parameters. If the source changes URL structure, it may be useful", -# ) -# pr_comments_url = Column(String()) -# pr_statuses_url = Column(String()) -# pr_meta_head_id = Column( -# String(), -# comment="The metadata for the head repo that links to the pull_request_meta table. ", -# ) -# pr_meta_base_id = Column( -# String(), -# comment="The metadata for the base repo that links to the pull_request_meta table. ", -# ) -# pr_src_issue_url = Column(String()) -# pr_src_comments_url = Column(String()) -# pr_src_review_comments_url = Column(String()) -# pr_src_commits_url = Column(String()) -# pr_src_statuses_url = Column(String()) -# pr_src_author_association = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# analysis = relationship("PullRequestAnalysis", back_populates="pull_request") -# assignees = relationship("PullRequestAssignees") -# commits = relationship("PullRequestCommits") -# events = relationship("PullRequestEvents") -# files = relationship("PullRequestFiles") -# labels = relationship("PullRequestLabels") -# msg_ref = relationship("PullRequestMessageRef", back_populates="pull_request") -# meta_data = relationship("PullRequestMeta") -# reviewers = relationship("PullRequestReviewers") -# reviews = relationship("PullRequestReviews") -# teams = relationship("PullRequestTeams") - -# def get_messages(self): - -# messages = [] -# for msg_ref in self.msg_ref: -# messages.append(msg_ref.message) - -# return messages - -# __tablename__ = "pull_requests" -# __table_args__ = ( -# Index( -# "id_node", pr_src_id.desc().nullsfirst(), pr_src_node_id.desc().nullsfirst() -# ), -# Index("pull_requests_idx_repo_id_data_datex", repo_id, data_collection_date), -# {"schema": "augur_data"}, -# ) - - -# class Releases(Base): -# release_id = Column(CHAR(length=64), primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_releases_repo_1"), -# nullable=False, -# ) -# release_name = Column(String()) -# release_description = Column(String()) -# release_author = Column(String()) -# release_created_at = Column(TIMESTAMP()) -# release_published_at = Column(TIMESTAMP()) -# release_updated_at = Column(TIMESTAMP()) -# release_is_draft = Column(Boolean()) -# release_is_prerelease = Column(Boolean()) -# release_tag_name = Column(String()) -# release_url = Column(String()) -# tag_only = Column(Boolean()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "releases" -# __table_args__ = {"schema": "augur_data"} - - -# class Repo(Base): -# repo_id = Column(BigInteger, nullable=False) -# repo_group_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo_groups.repo_group_id", name="fk_repo_repo_groups_1" -# ), -# nullable=False, -# ) -# repo_git = Column(String(), nullable=False) -# repo_path = Column(String(), server_default="NULL") -# repo_name = Column(String(), server_default="NULL") -# repo_added = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) -# repo_status = Column(String(), nullable=False, server_default="New") -# repo_type = Column( -# String(), -# server_default="", -# comment='This field is intended to indicate if the repository is the "main instance" of a repository in cases where implementations choose to add the same repository to more than one repository group. In cases where the repository group is of rg_type Github Organization then this repo_type should be "primary". In other cases the repo_type should probably be "user created". We made this a varchar in order to hold open the possibility that there are additional repo_types we have not thought about. ', -# ) -# url = Column(String()) -# owner_id = Column(Integer) -# description = Column(String()) -# primary_language = Column(String()) -# created_at = Column(String()) -# forked_from = Column(String()) -# updated_at = Column(TIMESTAMP()) -# repo_archived_date_collected = Column(TIMESTAMP(timezone=True)) -# repo_archived = Column(Integer) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# commits = relationship("Commits") -# issues = relationship("Issues") -# pull_requests = relationship("PullRequests") -# libraries = relationship("Libraries") -# messages = relationship("Message") - -# pr_assignees = relationship("PullRequestAssignees") -# pr_commits = relationship("PullRequestCommits") -# pr_events = relationship("PullRequestEvents") -# pr_files = relationship("PullRequestFiles") -# pr_labels = relationship("PullRequestLabels") -# pr_meta_data = relationship("PullRequestMeta") -# pr_reviews = relationship("PullRequestReviews") - -# msg_analysis_summary = relationship("MessageAnalysisSummary", back_populates="repo") -# msg_sentiment_summary = relationship( -# "MessageSentimentSummary", back_populates="repo" -# ) - -# lstm_anomaly_results = relationship("LstmAnomalyResults") - -# releases = relationship("Releases") -# badges = relationship("RepoBadging") -# cluster_messages = relationship("RepoClusterMessages") -# dependencies = relationship("RepoDependencies") -# deps_libyear = relationship("RepoDepsLibyear") -# deps_scorecard_id = relationship("RepoDepsScorecard") - -# info = relationship("RepoInfo") -# insights = relationship("RepoInsights") -# insight_records = relationship("RepoInsightsRecords") - -# labor = relationship("RepoLabor") -# meta_data = relationship("RepoMeta") -# sbom_scans = relationship("RepoSbomScans") -# stats = relationship("RepoStats") -# topic = relationship("RepoTopic") - -# __tablename__ = "repo" -# __table_args__ = ( -# PrimaryKeyConstraint("repo_id", name="repounique"), -# Index("forked", forked_from), -# Index("repo_idx_repo_id_repo_namex", repo_id, repo_name), -# Index("repogitindexrep", repo_git), -# Index("reponameindex", repo_name, postgresql_using="hash"), -# Index("reponameindexbtree", repo_name), -# Index("rggrouponrepoindex", repo_group_id), -# Index("therepo", repo_id, unique=True), -# { -# "schema": "augur_data", -# "comment": "This table is a combination of the columns in Facade’s repo table and GHTorrent’s projects table. ", -# }, -# ) - - -# class RepoBadging(Base): -# badge_collection_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_badging_repo_1"), -# ) -# created_at = Column(TIMESTAMP(), server_default=func.current_timestamp()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) -# data = Column(JSONB()) - -# __tablename__ = "repo_badging" -# __table_args__ = { -# "schema": "augur_data", -# "comment": "This will be collected from the LF’s Badging API\nhttps://bestpractices.coreinfrastructure.org/projects.json?pq=https%3A%2F%2Fgithub.com%2Fchaoss%2Faugur\n", -# } - - -# class RepoClusterMessages(Base): -# msg_cluster_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_cluster_messages_repo_1"), -# ) -# cluster_content = Column(Integer) -# cluster_mechanism = Column(Integer) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_cluster_messages" -# __table_args__ = {"schema": "augur_data"} - - -# class RepoDependencies(Base): -# repo_dependencies_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="repo_id"), -# comment="Forign key for repo id. ", -# ) -# dep_name = Column(String(), comment="Name of the dependancy found in project. ") -# dep_count = Column(Integer, comment="Number of times the dependancy was found. ") -# dep_language = Column(String(), comment="Language of the dependancy. ") -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_dependencies" -# __table_args__ = { -# "schema": "augur_data", -# "comment": "Contains the dependencies for a repo.", -# } - - -# # TODO: typo in field current_verion -# class RepoDepsLibyear(Base): -# repo_deps_libyear_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, ForeignKey("augur_data.repo.repo_id", name="repo_id_copy_2") -# ) -# name = Column(String()) -# requirement = Column(String()) -# type = Column(String()) -# package_manager = Column(String()) -# current_verion = Column(String()) -# latest_version = Column(String()) -# current_release_date = Column(String()) -# latest_release_date = Column(String()) -# libyear = Column(Float()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_deps_libyear" -# __table_args__ = {"schema": "augur_data"} - - -# class RepoDepsScorecard(Base): -# repo_deps_scorecard_id = Column(BigInteger, nullable=False) -# repo_id = Column( -# BigInteger, ForeignKey("augur_data.repo.repo_id", name="repo_id_copy_1") -# ) -# name = Column(String()) -# status = Column(String()) -# score = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_deps_scorecard" -# __table_args__ = ( -# PrimaryKeyConstraint( -# "repo_deps_scorecard_id", name="repo_deps_scorecard_pkey1" -# ), -# {"schema": "augur_data"}, -# ) - - -# class RepoGroupInsights(Base): -# rgi_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_group_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo_groups.repo_group_id", -# name="fk_repo_group_insights_repo_groups_1", -# ), -# ) -# rgi_metric = Column(String()) -# rgi_value = Column(String()) -# cms_id = Column(BigInteger) -# rgi_fresh = Column( -# Boolean(), -# comment='false if the date is before the statistic that triggered the insight, true if after. This allows us to automatically display only "fresh insights" and avoid displaying "stale insights". The insight worker will populate this table. ', -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# repo_group = relationship("RepoGroups") - -# __tablename__ = "repo_group_insights" -# __table_args__ = { -# "schema": "augur_data", -# "comment": 'This table is output from an analytical worker inside of Augur. It runs through the different metrics on a REPOSITORY_GROUP and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. \n\nWorker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. ', -# } - - -# class RepoGroups(Base): -# repo_group_id = Column(BigInteger, nullable=False) -# rg_name = Column(String(), nullable=False) -# rg_description = Column(String(), server_default="NULL") -# rg_website = Column(String(), server_default="NULL") -# rg_recache = Column(SmallInteger, server_default=text("1")) -# rg_last_modified = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) -# rg_type = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# repos = relationship("Repo") -# rg_list_serve = relationship("RepoGroupsListServe") - -# __tablename__ = "repo_groups" -# __table_args__ = ( -# PrimaryKeyConstraint("repo_group_id", name="rgid"), -# Index("rgidm", repo_group_id, unique=True), -# Index("rgnameindex", rg_name), -# { -# "schema": "augur_data", -# "comment": "rg_type is intended to be either a GitHub Organization or a User Created Repo Group. ", -# }, -# ) - - -# class RepoGroupsListServe(Base): -# rgls_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_group_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo_groups.repo_group_id", -# name="fk_repo_groups_list_serve_repo_groups_1", -# ), -# nullable=False, -# ) -# rgls_name = Column(String()) -# rgls_description = Column(String()) -# rgls_sponsor = Column(String()) -# rgls_email = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_groups_list_serve" -# __table_args__ = ( -# UniqueConstraint("rgls_id", "repo_group_id", name="rglistserve"), -# Index("lister", rgls_id, repo_group_id, unique=True), -# {"schema": "augur_data"}, -# ) - - -# class RepoInfo(Base): -# repo_info_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_info_repo_1"), -# nullable=False, -# ) -# last_updated = Column(TIMESTAMP()) -# issues_enabled = Column(String()) -# open_issues = Column(Integer) -# pull_requests_enabled = Column(String()) -# wiki_enabled = Column(String()) -# pages_enabled = Column(String()) -# fork_count = Column(Integer) -# default_branch = Column(String()) -# watchers_count = Column(Integer) -# UUID = Column(Integer) -# license = Column(String()) -# stars_count = Column(Integer) -# committers_count = Column(Integer) -# issue_contributors_count = Column(String()) -# changelog_file = Column(String()) -# contributing_file = Column(String()) -# license_file = Column(String()) -# code_of_conduct_file = Column(String()) -# security_issue_file = Column(String()) -# security_audit_file = Column(String()) -# status = Column(String()) -# keywords = Column(String()) -# commit_count = Column(BigInteger) -# issues_count = Column(BigInteger) -# issues_closed = Column(BigInteger) -# pull_request_count = Column(BigInteger) -# pull_requests_open = Column(BigInteger) -# pull_requests_closed = Column(BigInteger) -# pull_requests_merged = Column(BigInteger) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_info" -# __table_args__ = ( -# # TODO: Their appears to be two of the same index in current database -# Index("repo_info_idx_repo_id_data_date_1x", repo_id, data_collection_date), -# {"schema": "augur_data"}, -# ) - - -# # TODO: Why is numeric defined without level or precision? -# class RepoInsights(Base): -# ri_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_insights_repo_1"), -# ) -# ri_metric = Column(String()) -# ri_value = Column(String()) -# ri_date = Column(TIMESTAMP()) -# ri_fresh = Column( -# Boolean(), -# comment='false if the date is before the statistic that triggered the insight, true if after. This allows us to automatically display only "fresh insights" and avoid displaying "stale insights". The insight worker will populate this table. ', -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) -# ri_score = Column(Numeric()) -# ri_field = Column(String()) -# ri_detection_method = Column(String()) - -# __tablename__ = "repo_insights" -# __table_args__ = { -# "schema": "augur_data", -# "comment": 'This table is output from an analytical worker inside of Augur. It runs through the different metrics on a repository and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. \n\nWorker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. ', -# } - - -# class RepoInsightsRecords(Base): -# ri_id = Column( -# BigInteger, primary_key=True, nullable=False, comment="Primary key. " -# ) -# repo_id = Column( -# BigInteger, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="repo_id_ref", -# ondelete="SET NULL", -# onupdate="CASCADE", -# ), -# comment="Refers to repo table primary key. Will have a foreign key", -# ) -# ri_metric = Column(String(), comment="The metric endpoint") -# ri_field = Column(String(), comment="The field in the metric endpoint") -# ri_value = Column(String(), comment="The value of the endpoint in ri_field") -# ri_date = Column( -# TIMESTAMP(), -# comment="The date the insight is for; in other words, some anomaly occurred on this date. ", -# ) -# ri_score = Column(Float(), comment="A Score, derived from the algorithm used. ") -# ri_detection_method = Column( -# String(), -# comment='A confidence interval or other expression of the type of threshold and the value of a threshold met in order for it to be "an insight". Example. "95% confidence interval". ', -# ) -# tool_source = Column(String(), comment="Standard Augur Metadata") -# tool_version = Column(String(), comment="Standard Augur Metadata") -# data_source = Column(String(), comment="Standard Augur Metadata") -# data_collection_date = Column( -# TIMESTAMP(), -# server_default=func.current_timestamp(), -# comment="Standard Augur Metadata", -# ) - -# __tablename__ = "repo_insights_records" -# __table_args__ = (Index("dater", ri_date), {"schema": "augur_data"}) - - -# class RepoLabor(Base): -# repo_labor_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_labor_repo_1"), -# ) -# repo_clone_date = Column(TIMESTAMP()) -# rl_analysis_date = Column(TIMESTAMP()) -# programming_language = Column(String()) -# file_path = Column(String()) -# file_name = Column(String()) -# total_lines = Column(Integer) -# code_lines = Column(Integer) -# comment_lines = Column(Integer) -# blank_lines = Column(Integer) -# code_complexity = Column(Integer) -# repo_url = Column( -# String(), -# comment="This is a convenience column to simplify analysis against external datasets", -# ) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_labor" -# __table_args__ = { -# "schema": "augur_data", -# "comment": "repo_labor is a derivative of tables used to store scc code and complexity counting statistics that are inputs to labor analysis, which are components of CHAOSS value metric calculations. ", -# } - - -# class RepoMeta(Base): -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_meta_repo_1"), -# primary_key=True, -# nullable=False, -# ) -# rmeta_id = Column(BigInteger, primary_key=True, nullable=False) -# rmeta_name = Column(String()) -# rmeta_value = Column(String(), server_default=text("0")) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_meta" -# __table_args__ = {"schema": "augur_data", "comment": "Project Languages"} - - -# class RepoSbomScans(Base): -# rsb_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# Integer, -# ForeignKey( -# "augur_data.repo.repo_id", -# name="repo_linker_sbom", -# ondelete="CASCADE", -# onupdate="CASCADE", -# ), -# ) -# sbom_scan = Column(JSON()) - -# __tablename__ = "repo_sbom_scans" -# __table_args__ = {"schema": "augur_data"} - - -# class RepoStats(Base): -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_stats_repo_1"), -# primary_key=True, -# nullable=False, -# ) -# rstat_id = Column(BigInteger, primary_key=True, nullable=False) -# rstat_name = Column(String()) -# rstat_value = Column(BigInteger) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_stats" -# __table_args__ = {"schema": "augur_data", "comment": "Project Watchers"} - - -# class RepoTestCoverage(Base): -# repo_id = Column( -# BigInteger, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_test_coverage_repo_1"), -# primary_key=True, -# nullable=False, -# ) -# repo_clone_date = Column(TIMESTAMP()) -# rtc_analysis_date = Column(TIMESTAMP()) -# programming_language = Column(String()) -# file_path = Column(String()) -# file_name = Column(String()) -# testing_tool = Column(String()) -# file_statement_count = Column(BigInteger) -# file_subroutine_count = Column(BigInteger) -# file_statements_tested = Column(BigInteger) -# file_subroutines_tested = Column(BigInteger) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_test_coverage" -# __table_args__ = {"schema": "augur_data"} - - -# class RepoTopic(Base): -# repo_topic_id = Column(BigInteger, primary_key=True, nullable=False) -# repo_id = Column( -# Integer, -# ForeignKey("augur_data.repo.repo_id", name="fk_repo_topic_repo_1"), -# ) -# topic_id = Column(Integer) -# topic_prob = Column(Float()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "repo_topic" -# __table_args__ = {"schema": "augur_data"} - - -# # TODO: Add foreign key to repo table - - -# class ReposFetchLog(Base): -# repos_fetch_log_id = Column(BigInteger, primary_key=True) -# repos_id = Column(Integer, nullable=False) -# status = Column(String(), nullable=False) -# date = Column(TIMESTAMP(), nullable=False, server_default=func.current_timestamp()) - -# __tablename__ = "repos_fetch_log" -# __table_args__ = ( -# # TODO: There appear to be two identical indexes -# Index("repos_id,status", repos_id, status), -# {"schema": "augur_data"}, -# ) - - -# class Settings(Base): -# id = Column(Integer, primary_key=True, nullable=False) -# setting = Column(String(), nullable=False) -# value = Column(String(), nullable=False) -# last_modified = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) - -# __tablename__ = "settings" -# __table_args__ = {"schema": "augur_data"} - - -# class TopicWords(Base): -# topic_words_id = Column(BigInteger, primary_key=True, nullable=False) -# topic_id = Column(BigInteger) -# word = Column(String()) -# word_prob = Column(Float()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "topic_words" -# __table_args__ = {"schema": "augur_data"} - - -# # TODO: Add foreign key to repo_group table - - -# class UnknownCache(Base): -# unknown_cache_id = Column(BigInteger, primary_key=True) -# type = Column(String(), nullable=False) -# repo_group_id = Column(Integer, nullable=False) -# email = Column(String(), nullable=False) -# domain = Column(String(), server_default="NULL") -# added = Column(BigInteger, nullable=False) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "unknown_cache" -# __table_args__ = ( -# Index("type,projects_id", type, repo_group_id), -# {"schema": "augur_data"}, -# ) - - -# class UnresolvedCommitEmails(Base): -# email_unresolved_id = Column(BigInteger, primary_key=True, nullable=False) -# email = Column(String(), nullable=False) -# name = Column(String()) -# tool_source = Column(String()) -# tool_version = Column(String()) -# data_source = Column(String()) -# data_collection_date = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "unresolved_commit_emails" -# __table_args__ = ( -# UniqueConstraint("email", name="unresolved_commit_emails_email_key"), -# {"schema": "augur_data"}, -# ) - - -# class UtilityLog(Base): -# id = Column(BigInteger, primary_key=True, nullable=False) -# level = Column(String(), nullable=False) -# status = Column(String(), nullable=False) -# attempted = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) - -# __tablename__ = "utility_log" -# __table_args__ = {"schema": "augur_data"} - - -# # TODO: Add foreign key to repo table - - -# class WorkingCommits(Base): -# working_commits_id = Column(BigInteger, primary_key=True) -# repos_id = Column(Integer, nullable=False) -# working_commit = Column(String(), server_default="NULL") - -# __tablename__ = "working_commits" -# __table_args__ = {"schema": "augur_data"} - - -# # class WorkingCommits(Base): -# # working_commits_id = Column(BigInteger) -# # repos_id = Column(Integer, nullable=False) -# # working_commit = Column(String()) - -# # __tablename__ = 'working_commits' -# # __table_args__ = ( -# # PrimaryKeyConstraint('working_commits_id'), -# # {"schema":"augur_operations"} -# # ) diff --git a/augur/application/db/models/augur_operations_old.py b/augur/application/db/models/augur_operations_old.py deleted file mode 100644 index 898b6f77f2..0000000000 --- a/augur/application/db/models/augur_operations_old.py +++ /dev/null @@ -1,123 +0,0 @@ -# from augur.application.db.models.base import Base -# from sqlalchemy import ( -# Index, -# Column, -# Integer, -# String, -# UniqueConstraint, -# BigInteger, -# TIMESTAMP, -# PrimaryKeyConstraint, -# func, -# text, -# ) - -# # Start of Augur Operations tablespoon -# class All(Base): -# all_id = Column(BigInteger, primary_key=True) -# Name = Column(String()) -# Bytes = Column(String()) -# Lines = Column(String()) -# Code = Column(String()) -# Comment = Column(String()) -# Blank = Column(String()) -# Complexity = Column(String()) -# Count = Column(String()) -# WeightedComplexity = Column(String()) -# Files = Column(String()) - -# __tablename__ = "all" -# __table_args__ = {"schema": "augur_operations"} - - -# class AugurSettings(Base): -# id = Column(BigInteger) -# setting = Column(String()) -# value = Column(String()) -# last_modified = Column(TIMESTAMP(), server_default=func.current_timestamp()) - -# __tablename__ = "augur_settings" -# __table_args__ = ( -# PrimaryKeyConstraint("id"), -# UniqueConstraint("setting", name="setting-unique"), -# {"schema": "augur_operations"}, -# ) - - -# class ReposFetchLog(Base): -# repos_fetch_log_id = Column(BigInteger) -# repos_id = Column(Integer, nullable=False) -# status = Column(String(), nullable=False) -# date = Column(TIMESTAMP(), nullable=False, server_default=func.current_timestamp()) - -# __tablename__ = "repos_fetch_log" -# __table_args__ = ( -# PrimaryKeyConstraint("repos_fetch_log_id"), -# Index("repos_id,statusops", repos_id, status), -# {"schema": "augur_operations"}, -# ) - - -# # TODO: Add foreign key to Repo table -# class WorkerHistory(Base): -# history_id = Column(BigInteger) -# repo_id = Column(BigInteger) -# worker = Column(String(), nullable=False) -# job_model = Column(String(), nullable=False) -# oauth_id = Column(Integer) -# timestamp = Column(TIMESTAMP(), nullable=False) -# status = Column(String(), nullable=False) -# total_results = Column(Integer) - -# __tablename__ = "worker_history" -# __table_args__ = ( -# PrimaryKeyConstraint("history_id", name="history_pkey"), -# {"schema": "augur_operations"}, -# ) - - -# class WorkerJob(Base): -# job_model = Column(String()) -# state = Column(Integer, nullable=False, server_default=text("0")) -# zombie_head = Column(Integer) -# since_id_str = Column(String(), nullable=False, server_default="0") -# description = Column(String(), server_default="None") -# last_count = Column(Integer) -# last_run = Column(TIMESTAMP()) -# analysis_state = Column(Integer, server_default=text("0")) -# oauth_id = Column(Integer, nullable=False) - -# __tablename__ = "worker_job" -# __table_args__ = ( -# PrimaryKeyConstraint("job_model", name="job_pkey"), -# {"schema": "augur_operations"}, -# ) - - -# class WorkerOauth(Base): -# oauth_id = Column(BigInteger) -# name = Column(String(), nullable=False) -# consumer_key = Column(String(), nullable=False) -# consumer_secret = Column(String(), nullable=False) -# access_token = Column(String(), nullable=False) -# access_token_secret = Column(String(), nullable=False) -# repo_directory = Column(String()) -# platform = Column(String(), server_default="github") - -# __tablename__ = "worker_oauth" -# __table_args__ = (PrimaryKeyConstraint("oauth_id"), {"schema": "augur_operations"}) - - -# class WorkerSettingsFacade(Base): -# id = Column(Integer) -# setting = Column(String(), nullable=False) -# value = Column(String(), nullable=False) -# last_modified = Column( -# TIMESTAMP(), nullable=False, server_default=func.current_timestamp() -# ) - -# __tablename__ = "worker_settings_facade" -# __table_args__ = ( -# PrimaryKeyConstraint("id", name="settings_pkey"), -# {"schema": "augur_operations"}, -# ) diff --git a/augur/application/db/models/spdx_old.py b/augur/application/db/models/spdx_old.py deleted file mode 100644 index 7c5ffdcb5c..0000000000 --- a/augur/application/db/models/spdx_old.py +++ /dev/null @@ -1,525 +0,0 @@ -# from augur.application.db.models.base import Base -# from sqlalchemy import ( -# Column, -# Integer, -# String, -# UniqueConstraint, -# PrimaryKeyConstraint, -# ForeignKey, -# Text, -# Boolean, -# TIMESTAMP, -# JSON, -# ) - - -# class AnnotationTypes(Base): -# annotation_type_id = Column(Integer, primary_key=True) -# name = Column(String(), nullable=False) - -# __tablename__ = "annotation_types" -# __table_args__ = ( -# UniqueConstraint("name", name="uc_annotation_type_name"), -# {"schema": "spdx"}, -# ) - - -# class Annotations(Base): -# annotation_id = Column(Integer, primary_key=True, nullable=False) -# document_id = Column( -# Integer, -# ForeignKey("spdx.documents.document_id", name="annotations_document_id_fkey"), -# nullable=False, -# ) -# annotation_type_id = Column( -# Integer, -# ForeignKey( -# "spdx.annotation_types.annotation_type_id", -# name="annotations_annotation_type_id_fkey", -# ), -# nullable=False, -# ) -# identifier_id = Column( -# Integer, -# ForeignKey( -# "spdx.identifiers.identifier_id", name="annotations_identifier_id_fkey" -# ), -# nullable=False, -# ) -# creator_id = Column( -# Integer, -# ForeignKey("spdx.creators.creator_id", name="annotations_creator_id_fkey"), -# nullable=False, -# ) -# created_ts = Column(TIMESTAMP(timezone=True)) -# comment = Column(Text(), nullable=False) - -# __tablename__ = "annotations" -# __table_args__ = {"schema": "spdx"} - - -# class AugurRepoMap(Base): -# map_id = Column(Integer, primary_key=True, nullable=False) -# dosocs_pkg_id = Column(Integer) -# dosocs_pkg_name = Column(Text()) -# repo_id = Column(Integer) -# repo_path = Column(Text()) - -# __tablename__ = "augur_repo_map" -# __table_args__ = {"schema": "spdx"} - - -# class CreatorTypes(Base): -# creator_type_id = Column(Integer, primary_key=True, nullable=False) -# name = Column(String(), nullable=False) - -# __tablename__ = "creator_types" -# __table_args__ = {"schema": "spdx"} - - -# class Creators(Base): -# creator_id = Column(Integer, primary_key=True, nullable=False) -# creator_type_id = Column( -# Integer, -# ForeignKey( -# "spdx.creator_types.creator_type_id", name="creators_creator_type_id_fkey" -# ), -# nullable=False, -# ) -# name = Column(String(), nullable=False) -# email = Column(String(), nullable=False) - -# __tablename__ = "creators" -# __table_args__ = {"schema": "spdx"} - - -# class DocumentNamespaces(Base): -# document_namespace_id = Column(Integer, primary_key=True, nullable=False) -# uri = Column(String(), nullable=False) - -# __tablename__ = "document_namespaces" -# __table_args__ = ( -# UniqueConstraint("uri", name="uc_document_namespace_uri"), -# {"schema": "spdx"}, -# ) - - -# class Documents(Base): -# document_id = Column(Integer, primary_key=True, nullable=False) -# document_namespace_id = Column( -# Integer, -# ForeignKey( -# "spdx.document_namespaces.document_namespace_id", -# name="documents_document_namespace_id_fkey", -# ), -# nullable=False, -# ) -# data_license_id = Column( -# Integer, -# ForeignKey("spdx.licenses.license_id", name="documents_data_license_id_fkey"), -# nullable=False, -# ) -# spdx_version = Column(String(), nullable=False) -# name = Column(String(), nullable=False) -# license_list_version = Column(String(), nullable=False) -# created_ts = Column(TIMESTAMP(timezone=True), nullable=False) -# creator_comment = Column(Text(), nullable=False) -# document_comment = Column(Text(), nullable=False) -# package_id = Column( -# Integer, -# ForeignKey("spdx.packages.package_id", name="documents_package_id_fkey"), -# nullable=False, -# ) - -# __tablename__ = "documents" -# __table_args__ = ( -# UniqueConstraint( -# "document_namespace_id", name="uc_document_document_namespace_id" -# ), -# {"schema": "spdx"}, -# ) - - -# class DocumentsCreators(Base): -# document_creator_id = Column(Integer, primary_key=True, nullable=False) -# document_id = Column( -# Integer, -# ForeignKey( -# "spdx.documents.document_id", name="documents_creators_document_id_fkey" -# ), -# nullable=False, -# ) -# creator_id = Column( -# Integer, -# ForeignKey( -# "spdx.creators.creator_id", name="documents_creators_creator_id_fkey" -# ), -# nullable=False, -# ) - -# __tablename__ = "documents_creators" -# __table_args__ = {"schema": "spdx"} - - -# class ExternalRefs(Base): -# external_ref_id = Column(Integer, primary_key=True, nullable=False) -# document_id = Column( -# Integer, -# ForeignKey("spdx.documents.document_id", name="external_refs_document_id_fkey"), -# nullable=False, -# ) -# document_namespace_id = Column( -# Integer, -# ForeignKey( -# "spdx.document_namespaces.document_namespace_id", -# name="external_refs_document_namespace_id_fkey", -# ), -# nullable=False, -# ) -# id_string = Column(String(), nullable=False) -# sha256 = Column(String(), nullable=False) - -# __tablename__ = "external_refs" -# __table_args__ = ( -# UniqueConstraint( -# "document_id", "id_string", name="uc_external_ref_document_id_string" -# ), -# {"schema": "spdx"}, -# ) - - -# class FileContributors(Base): -# file_contributor_id = Column(Integer, primary_key=True, nullable=False) -# file_id = Column( -# Integer, -# ForeignKey("spdx.files.file_id", name="file_contributors_file_id_fkey"), -# nullable=False, -# ) -# contributor = Column(Text(), nullable=False) - -# __tablename__ = "file_contributors" -# __table_args__ = {"schema": "spdx"} - - -# class FileTypes(Base): -# file_type_id = Column(Integer) -# name = Column(String(), nullable=False) - -# __tablename__ = "file_types" -# __table_args__ = ( -# PrimaryKeyConstraint("name", name="uc_file_type_name"), -# {"schema": "spdx"}, -# ) - - -# class Files(Base): -# file_id = Column(Integer, primary_key=True, nullable=False) -# file_type_id = Column(Integer) -# sha256 = Column(String(), nullable=False) -# copyright_text = Column(Text()) -# package_id = Column(Integer) -# comment = Column(Text(), nullable=False) -# notice = Column(Text(), nullable=False) - -# __tablename__ = "files" -# __table_args__ = ( -# UniqueConstraint("sha256", name="uc_file_sha256"), -# {"schema": "spdx"}, -# ) - - -# class FilesLicenses(Base): -# file_license_id = Column(Integer, primary_key=True, nullable=False) -# file_id = Column( -# Integer, -# ForeignKey("spdx.files.file_id", name="files_licenses_file_id_fkey"), -# nullable=False, -# ) -# license_id = Column( -# Integer, -# ForeignKey("spdx.licenses.license_id", name="files_licenses_license_id_fkey"), -# nullable=False, -# ) -# extracted_text = Column(Text(), nullable=False) - -# __tablename__ = "files_licenses" -# __table_args__ = ( -# UniqueConstraint("file_id", "license_id", name="uc_file_license"), -# {"schema": "spdx"}, -# ) - - -# class FilesScans(Base): -# file_scan_id = Column(Integer, primary_key=True, nullable=False) -# file_id = Column( -# Integer, -# ForeignKey("spdx.files.file_id", name="files_scans_file_id_fkey"), -# nullable=False, -# ) -# scanner_id = Column( -# Integer, -# ForeignKey("spdx.scanners.scanner_id", name="files_scans_scanner_id_fkey"), -# nullable=False, -# ) - -# __tablename__ = "files_scans" -# __table_args__ = ( -# UniqueConstraint("file_id", "scanner_id", name="uc_file_scanner_id"), -# {"schema": "spdx"}, -# ) - - -# # TODO: Add check to table - - -# class Identifiers(Base): -# identifier_id = Column(Integer, primary_key=True, nullable=False) -# document_namespace_id = Column( -# Integer, -# ForeignKey( -# "spdx.document_namespaces.document_namespace_id", -# name="identifiers_document_namespace_id_fkey", -# ), -# nullable=False, -# ) -# id_string = Column(String(), nullable=False) -# document_id = Column( -# Integer, -# ForeignKey("spdx.documents.document_id", name="identifiers_document_id_fkey"), -# ) -# package_id = Column( -# Integer, -# ForeignKey("spdx.packages.package_id", name="identifiers_package_id_fkey"), -# ) -# package_file_id = Column( -# Integer, -# ForeignKey( -# "spdx.packages_files.package_file_id", -# name="identifiers_package_file_id_fkey", -# ), -# ) - -# __tablename__ = "identifiers" -# __table_args__ = ( -# UniqueConstraint( -# "document_namespace_id", -# "id_string", -# name="uc_identifier_document_namespace_id", -# ), -# UniqueConstraint( -# "document_namespace_id", -# "document_id", -# name="uc_identifier_namespace_document_id", -# ), -# UniqueConstraint( -# "document_namespace_id", -# "package_id", -# name="uc_identifier_namespace_package_id", -# ), -# UniqueConstraint( -# "document_namespace_id", -# "package_file_id", -# name="uc_identifier_namespace_package_file_id", -# ), -# {"schema": "spdx"}, -# ) - - -# class Licenses(Base): -# license_id = Column(Integer, primary_key=True) -# name = Column(String()) -# short_name = Column(String(), nullable=False) -# cross_reference = Column(Text(), nullable=False) -# comment = Column(Text(), nullable=False) -# is_spdx_official = Column(Boolean(), nullable=False) - -# __tablename__ = "licenses" -# __table_args__ = ( -# UniqueConstraint("short_name", name="uc_license_short_name"), -# {"schema": "spdx"}, -# ) - - -# # TODO: Need to a check - - -# class Packages(Base): -# package_id = Column(Integer, primary_key=True) -# name = Column(String(), nullable=False) -# version = Column(String(), nullable=False) -# file_name = Column(Text(), nullable=False) -# supplier_id = Column( -# Integer, -# ForeignKey("spdx.creators.creator_id", name="packages_supplier_id_fkey"), -# ) -# originator_id = Column( -# Integer, -# ForeignKey("spdx.creators.creator_id", name="packages_originator_id_fkey"), -# ) -# download_location = Column(Text()) -# verification_code = Column(String(), nullable=False) -# ver_code_excluded_file_id = Column( -# Integer, -# ForeignKey( -# "spdx.packages_files.package_file_id", name="fk_package_packages_files" -# ), -# ) -# sha256 = Column(String()) -# home_page = Column(Text()) -# source_info = Column(Text(), nullable=False) -# concluded_license_id = Column( -# Integer, -# ForeignKey( -# "spdx.licenses.license_id", name="packages_concluded_license_id_fkey" -# ), -# ) -# declared_license_id = Column( -# Integer, -# ForeignKey( -# "spdx.licenses.license_id", name="packages_declared_license_id_fkey" -# ), -# ) -# license_comment = Column(Text(), nullable=False) -# copyright_text = Column(Text()) -# summary = Column(Text(), nullable=False) -# description = Column(Text(), nullable=False) -# comment = Column(Text(), nullable=False) -# dosocs2_dir_code = Column(String()) - -# __tablename__ = "packages" -# __table_args__ = ( -# UniqueConstraint("sha256", name="uc_package_sha256"), -# UniqueConstraint( -# "verification_code", "dosocs2_dir_code", name="uc_dir_code_ver_code" -# ), -# {"schema": "spdx"}, -# ) - - -# class PackagesFiles(Base): -# package_file_id = Column(Integer, primary_key=True) -# package_id = Column( -# Integer, -# ForeignKey("spdx.packages.package_id", name="fk_package_files_packages"), -# nullable=False, -# ) -# file_id = Column( -# Integer, -# ForeignKey("spdx.files.file_id", name="packages_files_file_id_fkey"), -# nullable=False, -# ) -# concluded_license_id = Column( -# Integer, -# ForeignKey( -# "spdx.licenses.license_id", name="packages_files_concluded_license_id_fkey" -# ), -# ) -# license_comment = Column(Text(), nullable=False) -# file_name = Column(Text(), nullable=False) - -# __tablename__ = "packages_files" -# __table_args__ = ( -# UniqueConstraint("package_id", "file_name", name="uc_package_id_file_name"), -# {"schema": "spdx"}, -# ) - - -# class PackagesScans(Base): -# package_scan_id = Column(Integer, primary_key=True) -# package_id = Column( -# Integer, -# ForeignKey("spdx.packages.package_id", name="packages_scans_package_id_fkey"), -# nullable=False, -# ) -# scanner_id = Column( -# Integer, -# ForeignKey("spdx.scanners.scanner_id", name="packages_scans_scanner_id_fkey"), -# nullable=False, -# ) - -# __tablename__ = "packages_scans" -# __table_args__ = ( -# UniqueConstraint("package_id", "scanner_id", name="uc_package_scanner_id"), -# {"schema": "spdx"}, -# ) - - -# class Projects(Base): -# package_id = Column(Integer, primary_key=True) -# name = Column(Text(), nullable=False) -# homepage = Column(Text(), nullable=False) -# uri = Column(Text(), nullable=False) - -# __tablename__ = "projects" -# __table_args__ = {"schema": "spdx"} - - -# class RelationshipTypes(Base): -# relationship_type_id = Column(Integer, primary_key=True) -# name = Column(String(), nullable=False) - -# __tablename__ = "relationship_types" -# __table_args__ = ( -# UniqueConstraint("name", name="uc_relationship_type_name"), -# {"schema": "spdx"}, -# ) - - -# class Relationships(Base): -# relationship_id = Column(Integer, primary_key=True) -# left_identifier_id = Column( -# Integer, -# ForeignKey( -# "spdx.identifiers.identifier_id", -# name="relationships_left_identifier_id_fkey", -# ), -# nullable=False, -# ) -# right_identifier_id = Column( -# Integer, -# ForeignKey( -# "spdx.identifiers.identifier_id", -# name="relationships_right_identifier_id_fkey", -# ), -# nullable=False, -# ) -# relationship_type_id = Column( -# Integer, -# ForeignKey( -# "spdx.relationship_types.relationship_type_id", -# name="relationships_relationship_type_id_fkey", -# ), -# nullable=False, -# ) -# relationship_comment = Column(Text(), nullable=False) - -# __tablename__ = "relationships" -# __table_args__ = ( -# UniqueConstraint( -# "left_identifier_id", -# "right_identifier_id", -# "relationship_type_id", -# name="uc_left_right_relationship_type", -# ), -# {"schema": "spdx"}, -# ) - - -# class SbomScans(Base): -# sbom_scan_id = Column(Integer, primary_key=True) -# repo_id = Column(Integer) -# sbom_scan = Column(JSON()) - -# __tablename__ = "sbom_scans" -# __table_args__ = {"schema": "spdx"} - - -# class Scanners(Base): -# scanner_id = Column(Integer, primary_key=True) -# name = Column(String(), nullable=False) - -# __tablename__ = "scanners" -# __table_args__ = ( -# UniqueConstraint("name", name="uc_scanner_name"), -# {"schema": "spdx"}, -# ) From c39b9f2908319047b819bf331ea5604ccb945451 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 3 Dec 2025 14:03:01 -0500 Subject: [PATCH 047/104] specify `pr_review_body` as a User generated content string field for cleaning Signed-off-by: Adrian Edwards --- augur/tasks/github/pull_requests/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 812a4eef25..40f56e0ee5 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -403,7 +403,8 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}") pr_review_natural_keys = ["pr_review_src_id",] - augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) + pr_review_string_fields = ["pr_review_body",] + augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys, string_fields=pr_review_string_fields) From 1a52bf5054d56b570ce1e5bda077332640f9d527 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 3 Dec 2025 14:25:04 -0500 Subject: [PATCH 048/104] Update augur/tasks/git/util/facade_worker/facade_worker/config.py Co-authored-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Signed-off-by: Shlok Gilda --- augur/tasks/git/util/facade_worker/facade_worker/config.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index 09f3c9d6ca..85097d6d54 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -298,10 +298,7 @@ def run_git_command(self, cmd: str, timeout: int, capture_output: bool = False, result = subprocess.run(cmd, **run_options) # Return appropriate output based on capture_output flag - if capture_output: - return result.returncode, result.stdout.strip() - else: - return result.returncode, '' + return result.returncode, (result.stdout.strip() if capture_output else '') except subprocess.TimeoutExpired: self.log_activity('Error', f'Git operation timed out: {operation_description}') return -1, '' From 4d10fc13b18eee2c4a939741dc2b2920dc220672 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Wed, 3 Dec 2025 19:09:10 -0500 Subject: [PATCH 049/104] Implement batched processing for pull request reviews and contributors Signed-off-by: Shlok Gilda --- augur/tasks/github/pull_requests/tasks.py | 135 ++++++++++++++-------- 1 file changed, 90 insertions(+), 45 deletions(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 812a4eef25..2468b663d4 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -327,9 +327,60 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - +def _flush_pr_review_batch(augur_db, contributors: list, pr_reviews: list, logger, owner: str, repo: str) -> None: + """ + Insert accumulated PR review batch data into the database. + + Handles contributor deduplication before insertion and bulk inserts both + contributors and PR reviews. Uses ON CONFLICT upsert logic via insert_data(). + + Args: + augur_db: DatabaseSession instance for database operations. + contributors: List of contributor dicts to insert. Will be deduplicated + using remove_duplicate_dicts() before insertion. + pr_reviews: List of PR review dicts to insert. + logger: Logger instance for status messages. + owner: Repository owner (for log messages). + repo: Repository name (for log messages). + + Returns: + None. Lists are NOT cleared by this function - caller must clear them. + """ + if contributors: + # Remove duplicates within the batch before inserting + unique_contributors = remove_duplicate_dicts(contributors) + logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(unique_contributors)} contributors") + augur_db.insert_data(unique_contributors, Contributor, ["cntrb_id"]) + + if pr_reviews: + logger.info(f"{owner}/{repo}: Inserting {len(pr_reviews)} pr reviews") + pr_review_natural_keys = ["pr_review_src_id"] + augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) + + @celery.task(base=AugurSecondaryRepoCollectionTask) def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: + """ + Collect pull request reviews for a repository from the GitHub API. + Fetches reviews for each PR and inserts them into the database along with + their associated contributors. Uses batched processing to limit memory + usage - processes reviews in batches of ~1000 instead of accumulating all + reviews in memory before insertion. + + Args: + repo_git: The repository's git URL (e.g., 'https://github.com/owner/repo'). + full_collection: If True, collects reviews for all PRs. If False, only + collects reviews for PRs updated since the last secondary collection. + + Returns: + None. Data is inserted directly into the database. + + Note: + - Inherits error handling from AugurSecondaryRepoCollectionTask base class. + - Contributors are deduplicated within each batch before insertion. + - Uses ON CONFLICT upsert logic to handle duplicate reviews gracefully. + """ logger = logging.getLogger(collect_pull_request_reviews.__name__) owner, repo = get_owner_repo(repo_git) @@ -338,7 +389,6 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: tool_source = "pull_request_reviews" data_source = "Github API" - repo_id = get_repo_by_repo_git(repo_git).repo_id with GithubTaskManifest(logger) as manifest: augur_db = manifest.augur_db @@ -347,7 +397,6 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: repo_id = execute_session_query(query, 'one').repo_id if full_collection: - query = augur_db.session.query(PullRequest).filter(PullRequest.repo_id == repo_id).order_by(PullRequest.pr_src_number) prs = execute_session_query(query, 'all') else: @@ -355,66 +404,62 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: prs = get_updated_prs(repo_id, last_collected) pr_count = len(prs) + if pr_count == 0: + logger.debug(f"{owner}/{repo} No PRs to collect reviews for") + return + + logger.info(f"{owner}/{repo}: Collecting reviews for {pr_count} PRs") github_data_access = GithubDataAccess(manifest.key_auth, logger) - all_pr_reviews = {} - for index, pr in enumerate(prs): + # Batch processing: accumulate reviews until batch size reached, then flush + REVIEW_BATCH_SIZE = 1000 + contributors = [] + pr_review_dicts = [] + total_reviews_collected = 0 + for index, pr in enumerate(prs): pr_number = pr.pr_src_number pull_request_id = pr.pull_request_id - logger.debug(f"{owner}/{repo} Collecting Pr Reviews for pr {index + 1} of {pr_count}") + # Log progress every 100 PRs + if index % 100 == 0: + logger.debug(f"{owner}/{repo} Processing PR {index + 1} of {pr_count}") pr_review_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}/reviews" try: pr_reviews = list(github_data_access.paginate_resource(pr_review_url)) except UrlNotFoundException as e: - logger.warning(e) + logger.warning(f"{owner}/{repo} PR #{pr_number}: {e}") continue - if pr_reviews: - all_pr_reviews[pull_request_id] = pr_reviews - - if not list(all_pr_reviews.keys()): - logger.debug(f"{owner}/{repo} No pr reviews for repo") - return - - contributors = [] - for pull_request_id, reviews in all_pr_reviews.items(): - - for review in reviews: + # Single-pass extraction: get both contributor and review data together + for review in pr_reviews: + # Extract contributor contributor = process_pull_request_review_contributor(review, tool_source, tool_version, data_source) if contributor: contributors.append(contributor) - logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors") - augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) - - - pr_reviews = [] - for pull_request_id, reviews in all_pr_reviews.items(): - - for review in reviews: - + # Extract review data (only if contributor was successfully linked) if "cntrb_id" in review: - pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version)) - - logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}") - pr_review_natural_keys = ["pr_review_src_id",] - augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) - - - - - - - - - - - - - - + pr_review_dicts.append( + extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_version, data_source) + ) + + # Flush batch when threshold reached + if len(pr_review_dicts) >= REVIEW_BATCH_SIZE: + _flush_pr_review_batch(augur_db, contributors, pr_review_dicts, logger, owner, repo) + total_reviews_collected += len(pr_review_dicts) + contributors.clear() + pr_review_dicts.clear() + + # Flush any remaining data + if pr_review_dicts: + _flush_pr_review_batch(augur_db, contributors, pr_review_dicts, logger, owner, repo) + total_reviews_collected += len(pr_review_dicts) + + if total_reviews_collected == 0: + logger.debug(f"{owner}/{repo} No pr reviews found for repo") + else: + logger.info(f"{owner}/{repo}: Completed - collected {total_reviews_collected} reviews total") \ No newline at end of file From 413dc22b6c6d7ae10e254f011da5d15e8713c310 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Tue, 9 Dec 2025 14:18:42 -0500 Subject: [PATCH 050/104] Implement batched processing for collecting pull request review comments and contributors Signed-off-by: Shlok Gilda --- augur/tasks/github/pull_requests/tasks.py | 263 ++++++++++++++-------- 1 file changed, 167 insertions(+), 96 deletions(-) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 2468b663d4..8fbe5f4951 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -11,12 +11,12 @@ from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth -from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors +from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors from augur.application.db.util import execute_session_query from ..messages import process_github_comment_contributors from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected -from typing import Generator, List, Dict +from typing import List platform_id = 1 @@ -182,30 +182,6 @@ def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): pr_metadata_natural_keys, string_fields=pr_metadata_string_fields) - - - - - - - - - - - - - - - - - - - - - - - - def process_pull_request_review_contributor(pr_review: dict, tool_source: str, tool_version: str, data_source: str): # get contributor data and set pr cntrb_id @@ -220,7 +196,27 @@ def process_pull_request_review_contributor(pr_review: dict, tool_source: str, t @celery.task(base=AugurSecondaryRepoCollectionTask) def collect_pull_request_review_comments(repo_git: str, full_collection: bool) -> None: + """ + Collect pull request review comments for a repository from the GitHub API. + Fetches review comments and inserts them into the database along with + their associated contributors. Uses batched processing to limit memory + usage - processes comments in batches of ~1000 instead of accumulating all + comments in memory before insertion. + + Args: + repo_git: The repository's git URL (e.g., 'https://github.com/owner/repo'). + full_collection: If True, collects all review comments. If False, only + collects comments created since the last secondary collection. + + Returns: + None. Data is inserted directly into the database. + + Note: + - Inherits error handling from AugurSecondaryRepoCollectionTask base class. + - Contributors are deduplicated within each batch before insertion. + - Uses ON CONFLICT upsert logic to handle duplicate messages gracefully. + """ owner, repo = get_owner_repo(repo_git) review_msg_url = f"https://api.github.com/repos/{owner}/{repo}/pulls/comments" @@ -232,9 +228,9 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - if not full_collection: last_collected_date = get_secondary_data_last_collected(repo_id) - + if last_collected_date: - # subtract 2 days to ensure all data is collected + # Subtract 2 days to ensure all data is collected core_data_last_collected = (last_collected_date - timedelta(days=2)).replace(tzinfo=timezone.utc) review_msg_url += f"?since={core_data_last_collected.isoformat()}" else: @@ -242,11 +238,8 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - pr_reviews = get_pull_request_reviews_by_repo_id(repo_id) - # maps the github pr_review id to the auto incrementing pk that augur stores as pr_review id - pr_review_id_mapping = {} - for review in pr_reviews: - pr_review_id_mapping[review.pr_review_src_id] = review.pr_review_id - + # Build mapping once: github pr_review_src_id -> augur pr_review_id + pr_review_id_mapping = {review.pr_review_src_id: review.pr_review_id for review in pr_reviews} tool_source = "Pr review comment task" tool_version = "2.0" @@ -255,52 +248,156 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - key_auth = GithubRandomKeyAuth(logger) github_data_access = GithubDataAccess(key_auth, logger) - all_raw_pr_review_messages = list(github_data_access.paginate_resource(review_msg_url)) - + # Batch processing: accumulate comments until batch size reached, then flush + COMMENT_BATCH_SIZE = 1000 contributors = [] - for comment in all_raw_pr_review_messages: - + pr_review_comment_dicts = [] + pr_review_msg_mapping_data = {} + total_refs_inserted = 0 + + # Single-pass extraction: get both contributor and comment data together + for comment in github_data_access.paginate_resource(review_msg_url): + # Extract contributor _, contributor = process_github_comment_contributors(comment, tool_source, tool_version, data_source) if contributor is not None: contributors.append(contributor) - logger.info(f"{owner}/{repo} Pr review messages: Inserting {len(contributors)} contributors") - batch_insert_contributors(logger, contributors) + # Extract message data (only if it has a pr review id) + if comment.get("pull_request_review_id"): + pr_review_comment_dicts.append( + extract_needed_message_data(comment, platform_id, repo_id, tool_source, tool_version, data_source) + ) + # Map github message id to raw comment data for later ref creation + pr_review_msg_mapping_data[comment["id"]] = comment + + # Flush batch when threshold reached (check both to prevent unbounded growth) + if len(pr_review_comment_dicts) >= COMMENT_BATCH_SIZE or len(contributors) >= COMMENT_BATCH_SIZE: + refs_inserted = _flush_pr_review_comment_batch( + logger, contributors, pr_review_comment_dicts, pr_review_msg_mapping_data, + pr_review_id_mapping, repo_id, tool_version, data_source, owner, repo + ) + total_refs_inserted += refs_inserted + contributors.clear() + pr_review_comment_dicts.clear() + pr_review_msg_mapping_data.clear() + + # Flush any remaining data + if pr_review_comment_dicts: + refs_inserted = _flush_pr_review_comment_batch( + logger, contributors, pr_review_comment_dicts, pr_review_msg_mapping_data, + pr_review_id_mapping, repo_id, tool_version, data_source, owner, repo + ) + total_refs_inserted += refs_inserted + if total_refs_inserted == 0: + logger.debug(f"{owner}/{repo} No pr review comments found for repo") + else: + logger.info(f"{owner}/{repo}: Completed - collected {total_refs_inserted} pr review comment refs total") - pr_review_comment_dicts = [] - pr_review_msg_mapping_data = {} - pr_review_comments_len = len(all_raw_pr_review_messages) - for comment in all_raw_pr_review_messages: +def _flush_contributors(logger, contributors: list, owner: str, repo: str, context: str) -> None: + """ + Deduplicate and insert contributors for a batch. - # pull_request_review_id is required to map it to the correct pr review - if not comment["pull_request_review_id"]: - continue + Shared helper used by both PR review and PR review comment flush functions. + Handles deduplication via remove_duplicate_dicts() and bulk insert via + batch_insert_contributors(). - pr_review_comment_dicts.append( - extract_needed_message_data(comment, platform_id, repo_id, tool_source, tool_version, data_source) - ) + Args: + logger: Logger instance for status messages. + contributors: List of contributor dicts to insert. + owner: Repository owner (for log messages). + repo: Repository name (for log messages). + context: Description of what's being processed (e.g., "PR reviews", "PR review comments"). + """ + if contributors: + unique_contributors = remove_duplicate_dicts(contributors) + logger.info(f"{owner}/{repo} {context}: Inserting {len(unique_contributors)} contributors") + batch_insert_contributors(logger, unique_contributors) - # map github message id to the data that maps it to the pr review - github_msg_id = comment["id"] - pr_review_msg_mapping_data[github_msg_id] = comment +def _flush_pr_review_batch(augur_db, contributors: list, pr_reviews: list, logger, owner: str, repo: str) -> None: + """ + Insert accumulated PR review batch data into the database. + Handles contributor deduplication before insertion and bulk inserts both + contributors and PR reviews. Uses ON CONFLICT upsert logic via insert_data(). - logger.info(f"Inserting {len(pr_review_comment_dicts)} pr review comments") + Args: + augur_db: DatabaseSession instance for database operations. + contributors: List of contributor dicts to insert. Will be deduplicated + using remove_duplicate_dicts() before insertion. + pr_reviews: List of PR review dicts to insert. + logger: Logger instance for status messages. + owner: Repository owner (for log messages). + repo: Repository name (for log messages). + + Returns: + None. Lists are NOT cleared by this function - caller must clear them. + """ + _flush_contributors(logger, contributors, owner, repo, "PR reviews") + + if pr_reviews: + logger.info(f"{owner}/{repo}: Inserting {len(pr_reviews)} pr reviews") + pr_review_natural_keys = ["pr_review_src_id"] + augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) + + +def _flush_pr_review_comment_batch( + logger, + contributors: list, + pr_review_comment_dicts: list, + pr_review_msg_mapping_data: dict, + pr_review_id_mapping: dict, + repo_id: int, + tool_version: str, + data_source: str, + owner: str, + repo: str +) -> int: + """ + Insert accumulated PR review comment batch data into the database. + + Handles contributor deduplication before insertion, bulk inserts both + contributors and messages, then creates the message-to-review reference links. + Uses ON CONFLICT upsert logic via bulk_insert_dicts(). + + Args: + logger: Logger instance for status messages. + contributors: List of contributor dicts to insert. Will be deduplicated + using remove_duplicate_dicts() before insertion. + pr_review_comment_dicts: List of message dicts to insert into Message table. + pr_review_msg_mapping_data: Dict mapping github_msg_id to raw comment data + (needed for creating review refs after message insert). + pr_review_id_mapping: Dict mapping github pr_review_src_id to augur pr_review_id. + repo_id: The repository ID. + tool_version: Tool version string for metadata. + data_source: Data source string for metadata. + owner: Repository owner (for log messages). + repo: Repository name (for log messages). + + Returns: + Number of PR review message refs successfully inserted. + """ + _flush_contributors(logger, contributors, owner, repo, "PR review comments") + + if not pr_review_comment_dicts: + return 0 + + logger.info(f"{owner}/{repo}: Inserting {len(pr_review_comment_dicts)} pr review comments") message_natural_keys = ["platform_msg_id", "pltfrm_id"] message_return_columns = ["msg_id", "platform_msg_id"] message_string_fields = ["msg_text"] - message_return_data = bulk_insert_dicts(logger, pr_review_comment_dicts, Message, message_natural_keys, - return_columns=message_return_columns, string_fields=message_string_fields) - if message_return_data is None: - return + message_return_data = bulk_insert_dicts( + logger, pr_review_comment_dicts, Message, message_natural_keys, + return_columns=message_return_columns, string_fields=message_string_fields + ) + if message_return_data is None: + return 0 pr_review_message_ref_insert_data = [] for data in message_return_data: - augur_msg_id = data["msg_id"] github_msg_id = data["platform_msg_id"] @@ -315,47 +412,21 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - logger.warning(f"{owner}/{repo}: Could not find related pr review. We were searching for pr review with id: {github_pr_review_id}") continue - pr_review_message_ref = extract_pr_review_message_ref_data(comment, augur_pr_review_id, github_pr_review_id, repo_id, tool_version, data_source) + pr_review_message_ref = extract_pr_review_message_ref_data( + comment, augur_pr_review_id, github_pr_review_id, repo_id, tool_version, data_source + ) pr_review_message_ref_insert_data.append(pr_review_message_ref) + if pr_review_message_ref_insert_data: + logger.info(f"{owner}/{repo}: Inserting {len(pr_review_message_ref_insert_data)} pr review refs") + pr_comment_ref_natural_keys = ["pr_review_msg_src_id"] + pr_review_msg_ref_string_columns = ["pr_review_msg_diff_hunk"] + bulk_insert_dicts( + logger, pr_review_message_ref_insert_data, PullRequestReviewMessageRef, + pr_comment_ref_natural_keys, string_fields=pr_review_msg_ref_string_columns + ) - logger.info(f"Inserting {len(pr_review_message_ref_insert_data)} pr review refs") - pr_comment_ref_natural_keys = ["pr_review_msg_src_id"] - pr_review_msg_ref_string_columns = ["pr_review_msg_diff_hunk"] - bulk_insert_dicts(logger, pr_review_message_ref_insert_data, PullRequestReviewMessageRef, pr_comment_ref_natural_keys, string_fields=pr_review_msg_ref_string_columns) - - - - -def _flush_pr_review_batch(augur_db, contributors: list, pr_reviews: list, logger, owner: str, repo: str) -> None: - """ - Insert accumulated PR review batch data into the database. - - Handles contributor deduplication before insertion and bulk inserts both - contributors and PR reviews. Uses ON CONFLICT upsert logic via insert_data(). - - Args: - augur_db: DatabaseSession instance for database operations. - contributors: List of contributor dicts to insert. Will be deduplicated - using remove_duplicate_dicts() before insertion. - pr_reviews: List of PR review dicts to insert. - logger: Logger instance for status messages. - owner: Repository owner (for log messages). - repo: Repository name (for log messages). - - Returns: - None. Lists are NOT cleared by this function - caller must clear them. - """ - if contributors: - # Remove duplicates within the batch before inserting - unique_contributors = remove_duplicate_dicts(contributors) - logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(unique_contributors)} contributors") - augur_db.insert_data(unique_contributors, Contributor, ["cntrb_id"]) - - if pr_reviews: - logger.info(f"{owner}/{repo}: Inserting {len(pr_reviews)} pr reviews") - pr_review_natural_keys = ["pr_review_src_id"] - augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys) + return len(pr_review_message_ref_insert_data) @celery.task(base=AugurSecondaryRepoCollectionTask) From 1acb41bbd3a1979425485bab06b5b17f8fb688e8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 13 Nov 2025 17:02:15 -0500 Subject: [PATCH 051/104] formatting: move table name and schema attributes up top for consistency Signed-off-by: Adrian Edwards --- .../application/db/models/augur_operations.py | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 45ac1d8167..100cfd24dc 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -217,6 +217,10 @@ class WorkerSettingsFacade(Base): ) class BadgingDEI(Base): + __tablename__ = 'dei_badging' + __table_args__ = ( + {"schema": "augur_data"} + ) id = Column(Integer, primary_key=True, nullable=False) badging_id = Column(Integer, nullable=False) level = Column(String, nullable=False) @@ -227,27 +231,31 @@ class BadgingDEI(Base): repo = relationship("Repo") - __tablename__ = 'dei_badging' + +class Config(Base): + __tablename__ = 'config' __table_args__ = ( - {"schema": "augur_data"} + UniqueConstraint('section_name', "setting_name", name='unique-config-setting'), + {"schema": "augur_operations"} ) - -class Config(Base): id = Column(SmallInteger, primary_key=True, nullable=False) section_name = Column(String, nullable=False) setting_name = Column(String, nullable=False) value = Column(String) type = Column(String) - __tablename__ = 'config' - __table_args__ = ( - UniqueConstraint('section_name', "setting_name", name='unique-config-setting'), - {"schema": "augur_operations"} - ) + # add admit column to database class User(Base): + __tablename__ = 'users' + __table_args__ = ( + UniqueConstraint('email', name='user-unique-email'), + UniqueConstraint('login_name', name='user-unique-name'), + UniqueConstraint('text_phone', name='user-unique-phone'), + {"schema": "augur_operations"} + ) user_id = Column(Integer, primary_key=True) login_name = Column(String, nullable=False) @@ -262,13 +270,6 @@ class User(Base): data_source = Column(String) data_collection_date = Column(TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP")) - __tablename__ = 'users' - __table_args__ = ( - UniqueConstraint('email', name='user-unique-email'), - UniqueConstraint('login_name', name='user-unique-name'), - UniqueConstraint('text_phone', name='user-unique-phone'), - {"schema": "augur_operations"} - ) groups = relationship("UserGroup", back_populates="user") tokens = relationship("UserSessionToken", back_populates="user") @@ -629,17 +630,19 @@ def compute_hashsed_password(password): class UserGroup(Base): + __tablename__ = 'user_groups' + __table_args__ = ( + UniqueConstraint('user_id', 'name', name='user_group_unique'), + {"schema": "augur_operations"} + ) + group_id = Column(BigInteger, primary_key=True) user_id = Column(Integer, ForeignKey("augur_operations.users.user_id", name="user_group_user_id_fkey") ) name = Column(String, nullable=False) favorited = Column(Boolean, nullable=False, server_default=text("FALSE")) - __tablename__ = 'user_groups' - __table_args__ = ( - UniqueConstraint('user_id', 'name', name='user_group_unique'), - {"schema": "augur_operations"} - ) + user = relationship("User", back_populates="groups") repos = relationship("UserRepo", back_populates="group") From f2929f764402c8176c5af06555d4b0bdaeae06cd Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 14 Nov 2025 16:23:21 -0500 Subject: [PATCH 052/104] table_args formatting Signed-off-by: Adrian Edwards --- augur/application/db/models/augur_data.py | 18 ++++++++---- .../application/db/models/augur_operations.py | 28 ++++--------------- augur/application/db/models/spdx.py | 25 +++++++++++++---- 3 files changed, 37 insertions(+), 34 deletions(-) diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 034a2bec01..9f7d8c7fb3 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -1359,7 +1359,8 @@ class Commit(Base): class CommitMessage(Base): __tablename__ = "commit_messages" - __table_args__ = ( UniqueConstraint("repo_id","cmt_hash", name="commit-message-insert-unique"), + __table_args__ = ( + UniqueConstraint("repo_id","cmt_hash", name="commit-message-insert-unique"), { "schema": "augur_data", "comment": "This table holds commit messages", @@ -1930,9 +1931,12 @@ class RepoClusterMessage(Base): class RepoDependency(Base): __tablename__ = "repo_dependencies" - __table_args__ = ( UniqueConstraint("repo_id","dep_name","data_collection_date", name="deps-insert-unique"), - {"schema": "augur_data", - "comment": "Contains the dependencies for a repo.",}, + __table_args__ = ( + UniqueConstraint("repo_id","dep_name","data_collection_date", name="deps-insert-unique"), + { + "schema": "augur_data", + "comment": "Contains the dependencies for a repo." + }, ) repo_dependencies_id = Column( @@ -1960,7 +1964,8 @@ class RepoDependency(Base): class RepoDepsLibyear(Base): __tablename__ = "repo_deps_libyear" - __table_args__ = ( UniqueConstraint("repo_id","name", "data_collection_date", name="deps-libyear-insert-unique"), + __table_args__ = ( + UniqueConstraint("repo_id","name", "data_collection_date", name="deps-libyear-insert-unique"), {"schema": "augur_data"} ) @@ -1993,7 +1998,8 @@ class RepoDepsLibyear(Base): class RepoDepsScorecard(Base): __tablename__ = "repo_deps_scorecard" - __table_args__ = ( UniqueConstraint("repo_id","name", name="deps-scorecard-insert-unique"), + __table_args__ = ( + UniqueConstraint("repo_id","name", name="deps-scorecard-insert-unique"), {"schema": "augur_data"} ) diff --git a/augur/application/db/models/augur_operations.py b/augur/application/db/models/augur_operations.py index 100cfd24dc..12bb94e6cf 100644 --- a/augur/application/db/models/augur_operations.py +++ b/augur/application/db/models/augur_operations.py @@ -218,9 +218,7 @@ class WorkerSettingsFacade(Base): class BadgingDEI(Base): __tablename__ = 'dei_badging' - __table_args__ = ( - {"schema": "augur_data"} - ) + __table_args__ = {"schema": "augur_data"} id = Column(Integer, primary_key=True, nullable=False) badging_id = Column(Integer, nullable=False) level = Column(String, nullable=False) @@ -742,11 +740,7 @@ def convert_group_name_to_id(session, user_id: int, group_name: str) -> int: class UserRepo(Base): __tablename__ = "user_repos" - __table_args__ = ( - { - "schema": "augur_operations" - } - ) + __table_args__ = { "schema": "augur_operations" } group_id = Column( ForeignKey("augur_operations.user_groups.group_id", name="user_repo_group_id_fkey"), primary_key=True, nullable=False @@ -1013,11 +1007,7 @@ def add_github_org_repos(session, url: List[str], user_id: int, group_name: int) class UserSessionToken(Base): __tablename__ = "user_session_tokens" - __table_args__ = ( - { - "schema": "augur_operations" - } - ) + __table_args__ = { "schema": "augur_operations" } token = Column(String, primary_key=True, nullable=False) user_id = Column(ForeignKey("augur_operations.users.user_id", name="user_session_token_user_id_fkey")) @@ -1055,11 +1045,7 @@ def delete_refresh_tokens(self, session): class ClientApplication(Base): __tablename__ = "client_applications" - __table_args__ = ( - { - "schema": "augur_operations" - } - ) + __table_args__ = { "schema": "augur_operations" } id = Column(String, primary_key=True, nullable=False) user_id = Column(ForeignKey("augur_operations.users.user_id", name="client_application_user_id_fkey"), nullable=False) @@ -1086,11 +1072,7 @@ def get_by_id(session, client_id): class Subscription(Base): __tablename__ = "subscriptions" - __table_args__ = ( - { - "schema": "augur_operations" - } - ) + __table_args__ = { "schema": "augur_operations" } application_id = Column(ForeignKey("augur_operations.client_applications.id", name="subscriptions_application_id_fkey"), primary_key=True) type_id = Column(ForeignKey("augur_operations.subscription_types.id", name="subscriptions_type_id_fkey"), primary_key=True) diff --git a/augur/application/db/models/spdx.py b/augur/application/db/models/spdx.py index 9e4949cccb..4e981dc54a 100644 --- a/augur/application/db/models/spdx.py +++ b/augur/application/db/models/spdx.py @@ -176,7 +176,10 @@ class SpdxPackage(Base): class SpdxPackagesFile(Base): __tablename__ = "packages_files" - __table_args__ = (UniqueConstraint("package_id", "file_name"), {"schema": "spdx"}) + __table_args__ = ( + UniqueConstraint("package_id", "file_name"), + {"schema": "spdx"} + ) package_file_id = Column( Integer, @@ -312,7 +315,10 @@ class SpdxFileContributor(Base): class SpdxFilesLicense(Base): __tablename__ = "files_licenses" - __table_args__ = (UniqueConstraint("file_id", "license_id"), {"schema": "spdx"}) + __table_args__ = ( + UniqueConstraint("file_id", "license_id"), + {"schema": "spdx"} + ) file_license_id = Column( Integer, @@ -331,7 +337,10 @@ class SpdxFilesLicense(Base): class SpdxFilesScan(Base): __tablename__ = "files_scans" - __table_args__ = (UniqueConstraint("file_id", "scanner_id"), {"schema": "spdx"}) + __table_args__ = ( + UniqueConstraint("file_id", "scanner_id"), + {"schema": "spdx"} + ) file_scan_id = Column( Integer, @@ -347,7 +356,10 @@ class SpdxFilesScan(Base): class SpdxPackagesScan(Base): __tablename__ = "packages_scans" - __table_args__ = (UniqueConstraint("package_id", "scanner_id"), {"schema": "spdx"}) + __table_args__ = ( + UniqueConstraint("package_id", "scanner_id"), + {"schema": "spdx"} + ) package_scan_id = Column( Integer, @@ -383,7 +395,10 @@ class SpdxDocumentsCreator(Base): class SpdxExternalRef(Base): __tablename__ = "external_refs" - __table_args__ = (UniqueConstraint("document_id", "id_string"), {"schema": "spdx"}) + __table_args__ = ( + UniqueConstraint("document_id", "id_string"), + {"schema": "spdx"} + ) external_ref_id = Column( Integer, From 79fa279816d0d58dfb0a506fecd7e98022d83eb6 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 11:05:25 -0500 Subject: [PATCH 053/104] fix: Use list.clear() in facade tasks to reduce memory overhead Signed-off-by: Shlok Gilda --- augur/tasks/git/facade_tasks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index b0d638768f..08594a231a 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -212,7 +212,7 @@ def facade_fetch_missing_commit_messages(repo_git): if len(to_insert) >= 1000: bulk_insert_dicts(logger,to_insert, CommitMessage, ["repo_id","cmt_hash"]) - to_insert = [] + to_insert.clear() to_insert.append(msg_record) except Exception as e: @@ -313,13 +313,14 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: ) if pendingCommitRecordsToInsert: facade_bulk_insert_commits(logger, pendingCommitRecordsToInsert) - pendingCommitRecordsToInsert = [] + pendingCommitRecordsToInsert.clear() if commit_msg: pendingCommitMessageRecordsToInsert.append(commit_msg) if len(pendingCommitMessageRecordsToInsert) >= 1000: bulk_insert_dicts(logger, pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id", "cmt_hash"]) + pendingCommitMessageRecordsToInsert.clear() # FINAL MESSAGE INSERT bulk_insert_dicts(logger, pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id", "cmt_hash"]) From 836544d9dcdc844657a2608e27a58871e646fe87 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 11:05:46 -0500 Subject: [PATCH 054/104] fix: Process facade contributor results in batches Signed-off-by: Shlok Gilda --- augur/tasks/github/facade_github/tasks.py | 33 +++++++++++++++++++---- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index eff64df6ee..3396de7b64 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -252,7 +252,6 @@ def insert_facade_contributors(self, repo_git): #Execute statement with session. result = execute_sql(new_contrib_sql) - new_contribs = [dict(row) for row in result.mappings()] #print(new_contribs) @@ -262,7 +261,20 @@ def insert_facade_contributors(self, repo_git): key_auth = GithubRandomKeyAuth(logger) - process_commit_metadata(logger, key_auth, list(new_contribs), repo_id, platform_id) + # Process results in batches to reduce memory usage + batch = [] + BATCH_SIZE = 1000 + + for row in result.mappings(): + batch.append(dict(row)) + + if len(batch) >= BATCH_SIZE: + process_commit_metadata(logger, key_auth, batch, repo_id, platform_id) + batch.clear() + + # Process remaining items in batch + if batch: + process_commit_metadata(logger, key_auth, batch, repo_id, platform_id) logger.debug("DEBUG: Got through the new_contribs") @@ -300,10 +312,21 @@ def insert_facade_contributors(self, repo_git): result = execute_sql(resolve_email_to_cntrb_id_sql) - existing_cntrb_emails = [dict(row) for row in result.mappings()] - print(existing_cntrb_emails) - link_commits_to_contributor(logger, facade_helper,list(existing_cntrb_emails)) + # Process results in batches to reduce memory usage + batch = [] + BATCH_SIZE = 1000 + + for row in result.mappings(): + batch.append(dict(row)) + + if len(batch) >= BATCH_SIZE: + link_commits_to_contributor(logger, facade_helper, batch) + batch.clear() + + # Process remaining items in batch + if batch: + link_commits_to_contributor(logger, facade_helper, batch) return From 7f502bd94e5acecc9aea315eafff3d764212dca6 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 11:06:26 -0500 Subject: [PATCH 055/104] fix: Convert issues collection to generator pattern with batching Signed-off-by: Shlok Gilda --- augur/tasks/github/issues.py | 68 ++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 14 deletions(-) diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index 37bee5c8dd..d100d511bc 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -1,6 +1,6 @@ import logging import traceback -from datetime import timedelta, timezone +from datetime import timedelta, timezone, datetime from sqlalchemy.exc import IntegrityError @@ -20,9 +20,21 @@ development = get_development_flag() @celery.task(base=AugurCoreRepoCollectionTask) -def collect_issues(repo_git : str, full_collection: bool) -> int: +def collect_issues(repo_git: str, full_collection: bool) -> int: + """ + Collect all issues (excluding pull requests) for a repository. - logger = logging.getLogger(collect_issues.__name__) + Retrieves issues from GitHub API in batches of 1000 and inserts them along with + related labels, assignees, and contributors. + + Args: + repo_git: Full git URL (e.g., 'https://github.com/chaoss/augur') + full_collection: True for all historical data, False for incremental (last collection - 2 days) + + Returns: + Number of issues collected, or -1 on error + """ + logger = logging.getLogger(collect_issues.__name__) repo_id = get_repo_by_repo_git(repo_git).repo_id @@ -31,33 +43,60 @@ def collect_issues(repo_git : str, full_collection: bool) -> int: if full_collection: core_data_last_collected = None else: - # subtract 2 days to ensure all data is collected + # Subtract 2 days to ensure all data is collected core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc) key_auth = GithubRandomKeyAuth(logger) logger.info(f'this is the manifest.key_auth value: {str(key_auth)}') - try: - issue_data = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected) + try: + issue_data_generator = retrieve_all_issue_data(repo_git, logger, key_auth, core_data_last_collected) - if not issue_data: - logger.info(f"{owner}/{repo} has no issues") - return 0 + # Process issues in batches to avoid memory spikes + batch = [] + total_issues = 0 + batch_size = 1000 + + for issue in issue_data_generator: + batch.append(issue) - total_issues = len(issue_data) - process_issues(issue_data, f"{owner}/{repo}: Issue task", repo_id, logger) + if len(batch) >= batch_size: + logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues})") + process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger) + total_issues += len(batch) + batch.clear() + + # Process remaining issues in the last batch + if len(batch) > 0: + logger.info(f"{owner}/{repo}: Processing final batch of {len(batch)} issues") + process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger) + total_issues += len(batch) + + if total_issues == 0: + logger.info(f"{owner}/{repo} has no issues") return total_issues - + except Exception as e: logger.error(f"Could not collect issues for repo {repo_git}\n Reason: {e} \n Traceback: {''.join(traceback.format_exception(None, e, e.__traceback__))}") return -1 -def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None: +def retrieve_all_issue_data(repo_git: str, logger:logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None): + """ + Retrieve all issue data for a repository as a generator. + + Returns a generator to avoid materializing all issues in memory at once. + This is critical for repos with 10,000+ issues to prevent memory spikes. + Args: + repo_git (str): The GitHub repository in "owner/repo" format. + logger (logging.Logger): Logger for logging messages. + key_auth (GithubRandomKeyAuth): Auth handler for GitHub API. + since (datetime, optional): Only issues updated since this datetime will be retrieved. + """ owner, repo = get_owner_repo(repo_git) logger.info(f"Collecting issues for {owner}/{repo}") @@ -74,7 +113,8 @@ def retrieve_all_issue_data(repo_git, logger, key_auth, since) -> None: issues_paginator = github_data_access.paginate_resource(url) - return list(issues_paginator) + # Return the generator directly instead of materializing it + return issues_paginator def process_issues(issues, task_name, repo_id, logger) -> None: From 44e6967d70491a6fdf9148bf93628d63e3793184 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 11:06:50 -0500 Subject: [PATCH 056/104] fix: Add batch processing to PR commits and files collection Signed-off-by: Shlok Gilda --- .../pull_requests/commits_model/core.py | 14 ++++++--- .../github/pull_requests/files_model/core.py | 14 ++++++--- augur/tasks/github/pull_requests/tasks.py | 29 ++++++------------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index 2df6d66f5d..83b283bb6d 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -43,13 +43,15 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti logger.info(f"Getting pull request commits for repo: {repo.repo_git}") github_data_access = GithubDataAccess(key_auth, logger) - + + BATCH_SIZE = 1000 + pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] all_data = [] for index,pr_info in enumerate(pr_urls): logger.info(f'{task_name}: Querying commits for pull request #{index + 1} of {len(pr_urls)}') commits_url = pr_info['pr_url'] + '/commits?state=all' - + if not pr_info.get('pr_url'): logger.warning(f"{task_name}: No pr_url found for pull request info: {pr_info}. Skipping.") continue @@ -70,13 +72,17 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti 'repo_id': repo.repo_id, } all_data.append(pr_commit_row) + + if len(all_data) >= BATCH_SIZE: + logger.info(f"{task_name}: Inserting {len(all_data)} rows") + augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys) + all_data.clear() except UrlNotFoundException: logger.info(f"{task_name}: PR with url of {pr_info['pr_url']} returned 404 on commit data. Skipping.") continue - + if len(all_data) > 0: logger.info(f"{task_name}: Inserting {len(all_data)} rows") - pr_commits_natural_keys = ["pull_request_id", "repo_id", "pr_cmt_sha"] augur_db.insert_data(all_data,PullRequestCommit,pr_commits_natural_keys) diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index cbecb44d6d..60222a3bc1 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -40,12 +40,14 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) + BATCH_SIZE = 1000 + pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"] pr_file_rows = [] logger.info(f"Getting pull request files for repo: {repo.repo_git}") for index, pr_info in enumerate(pr_numbers): logger.info(f'Querying files for pull request #{index + 1} of {len(pr_numbers)}') - + query = """ query($repo: String!, $owner: String!,$pr_number: Int!, $numRecords: Int!, $cursor: String) { repository(name: $repo, owner: $owner) { @@ -68,7 +70,7 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection } } """ - + values = ["repository", "pullRequest", "files"] params = { 'owner': owner, @@ -92,6 +94,11 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection } pr_file_rows.append(data) + + if len(pr_file_rows) >= BATCH_SIZE: + logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows") + augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys) + pr_file_rows.clear() except NotFoundException as e: logger.info(f"{task_name}: PR with number of {pr_info['pr_src_number']} returned 404 on file data. Skipping.") continue @@ -101,6 +108,5 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection if len(pr_file_rows) > 0: - # Execute a bulk upsert with sqlalchemy - pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"] + logger.info(f"{task_name}: Inserting {len(pr_file_rows)} rows") augur_db.insert_data(pr_file_rows, PullRequestFile, pr_file_natural_keys) diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 40f56e0ee5..1fbfec060a 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -381,6 +381,7 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: logger.debug(f"{owner}/{repo} No pr reviews for repo") return + # Process contributors (all_pr_reviews already in memory, so no OOM risk) contributors = [] for pull_request_id, reviews in all_pr_reviews.items(): @@ -389,33 +390,21 @@ def collect_pull_request_reviews(repo_git: str, full_collection: bool) -> None: if contributor: contributors.append(contributor) - logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors") - augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) + logger.info(f"{owner}/{repo} Pr reviews: Inserting {len(contributors)} contributors") + augur_db.insert_data(contributors, Contributor, ["cntrb_id"]) + # Process pr reviews (all_pr_reviews already in memory, so no OOM risk) pr_reviews = [] for pull_request_id, reviews in all_pr_reviews.items(): for review in reviews: - + if "cntrb_id" in review: pr_reviews.append(extract_needed_pr_review_data(review, pull_request_id, repo_id, platform_id, tool_source, tool_version)) - logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}") - pr_review_natural_keys = ["pr_review_src_id",] - pr_review_string_fields = ["pr_review_body",] - augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys, string_fields=pr_review_string_fields) - - - - - - - - - - - - - + logger.info(f"{owner}/{repo}: Inserting pr reviews of length: {len(pr_reviews)}") + pr_review_natural_keys = ["pr_review_src_id",] + pr_review_string_fields = ["pr_review_body",] + augur_db.insert_data(pr_reviews, PullRequestReview, pr_review_natural_keys, string_fields=pr_review_string_fields) From ab2fd7b72bde2e7ccd3ec65ac71c65b2e7c30e46 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 20 Nov 2025 11:42:45 -0600 Subject: [PATCH 057/104] Update augur/tasks/github/issues.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Sean P. Goggins --- augur/tasks/github/issues.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index d100d511bc..68cae4d30c 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -84,7 +84,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: -def retrieve_all_issue_data(repo_git: str, logger:logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None): +def retrieve_all_issue_data(repo_git: str, logger: logging.Logger, key_auth: GithubRandomKeyAuth, since: datetime | None = None): """ Retrieve all issue data for a repository as a generator. From 0d068dc8c68016f12f75cfdfbf25409162cf4bbf Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Thu, 20 Nov 2025 11:43:41 -0600 Subject: [PATCH 058/104] Update augur/tasks/github/issues.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Sean P. Goggins --- augur/tasks/github/issues.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index 68cae4d30c..aaca35ed5f 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -62,7 +62,7 @@ def collect_issues(repo_git: str, full_collection: bool) -> int: batch.append(issue) if len(batch) >= batch_size: - logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues})") + logger.info(f"{owner}/{repo}: Processing batch of {len(batch)} issues (total so far: {total_issues + len(batch)})") process_issues(batch, f"{owner}/{repo}: Issue task", repo_id, logger) total_issues += len(batch) batch.clear() From b5eac7a48d9de6f89049ceab7c8474629f4ed3d4 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Thu, 20 Nov 2025 17:05:21 -0500 Subject: [PATCH 059/104] fix: Optimize database cursor usage by fetching results immediately in insert_facade_contributors Signed-off-by: Shlok Gilda --- augur/tasks/github/facade_github/tasks.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 3396de7b64..73fd9a51b5 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -253,6 +253,10 @@ def insert_facade_contributors(self, repo_git): #Execute statement with session. result = execute_sql(new_contrib_sql) + # Fetch all results immediately to close the database cursor/connection + # This prevents holding the connection open during GitHub API calls + rows = result.mappings().fetchall() + #print(new_contribs) #json.loads(pd.read_sql(new_contrib_sql, self.db, params={ @@ -265,7 +269,7 @@ def insert_facade_contributors(self, repo_git): batch = [] BATCH_SIZE = 1000 - for row in result.mappings(): + for row in rows: batch.append(dict(row)) if len(batch) >= BATCH_SIZE: @@ -313,11 +317,15 @@ def insert_facade_contributors(self, repo_git): result = execute_sql(resolve_email_to_cntrb_id_sql) + # Fetch all results immediately to close the database cursor/connection + # This prevents holding the connection open during database UPDATE operations + rows = result.mappings().fetchall() + # Process results in batches to reduce memory usage batch = [] BATCH_SIZE = 1000 - for row in result.mappings(): + for row in rows: batch.append(dict(row)) if len(batch) >= BATCH_SIZE: From f9052cbfe0b9d711002cbf02a6d12327cf1d6eb6 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 1 Dec 2025 15:29:49 -0500 Subject: [PATCH 060/104] Pylint and other style fixes Signed-off-by: Adrian Edwards --- augur/tasks/git/facade_tasks.py | 2 +- augur/tasks/github/facade_github/tasks.py | 1 - augur/tasks/github/issues.py | 2 +- augur/tasks/github/pull_requests/tasks.py | 34 ++++------------------- 4 files changed, 7 insertions(+), 32 deletions(-) diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index 08594a231a..0726b6df11 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -255,7 +255,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: facade_helper.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}") - if not len(missing_commits) or repo_id is None: + if not missing_commits or repo_id is None: #session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits)) return diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 73fd9a51b5..53a3d6648a 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -5,7 +5,6 @@ from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth -from augur.application.db.models import Contributor from augur.tasks.github.facade_github.core import * from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors from augur.application.db.lib import get_session, execute_session_query diff --git a/augur/tasks/github/issues.py b/augur/tasks/github/issues.py index aaca35ed5f..91e56deaf7 100644 --- a/augur/tasks/github/issues.py +++ b/augur/tasks/github/issues.py @@ -12,7 +12,7 @@ from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth from augur.tasks.github.util.util import add_key_value_pair_to_dicts, get_owner_repo from augur.tasks.util.worker_util import remove_duplicate_dicts -from augur.application.db.models import Issue, IssueLabel, IssueAssignee, Contributor +from augur.application.db.models import Issue, IssueLabel, IssueAssignee from augur.application.config import get_development_flag from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_core_data_last_collected, batch_insert_contributors diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index 1fbfec060a..d4d0b3114d 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -11,12 +11,12 @@ from augur.application.db.models import PullRequest, Message, PullRequestReview, PullRequestLabel, PullRequestReviewer, PullRequestMeta, PullRequestAssignee, PullRequestReviewMessageRef, Contributor, Repo from augur.tasks.github.util.github_task_session import GithubTaskManifest from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth -from augur.application.db.lib import get_session, get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors +from augur.application.db.lib import get_repo_by_repo_git, bulk_insert_dicts, get_pull_request_reviews_by_repo_id, batch_insert_contributors from augur.application.db.util import execute_session_query from ..messages import process_github_comment_contributors from augur.application.db.lib import get_secondary_data_last_collected, get_updated_prs, get_core_data_last_collected -from typing import Generator, List, Dict +from typing import List platform_id = 1 @@ -52,15 +52,15 @@ def collect_pull_requests(repo_git: str, full_collection: bool) -> int: total_count += len(all_data) all_data.clear() - if len(all_data): + if all_data: process_pull_requests(all_data, f"{owner}/{repo}: Github Pr task", repo_id, logger, augur_db) total_count += len(all_data) if total_count > 0: - return total_count - else: logger.debug(f"{owner}/{repo} has no pull requests") return 0 + + return total_count @@ -182,30 +182,6 @@ def process_pull_requests(pull_requests, task_name, repo_id, logger, augur_db): pr_metadata_natural_keys, string_fields=pr_metadata_string_fields) - - - - - - - - - - - - - - - - - - - - - - - - def process_pull_request_review_contributor(pr_review: dict, tool_source: str, tool_version: str, data_source: str): # get contributor data and set pr cntrb_id From 29edd21a196a91231f0f59c5d79cbd6eb65e13b4 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 16:31:14 -0400 Subject: [PATCH 061/104] swap code to using tomli Signed-off-by: Adrian Edwards --- .../libyear_util/pypi_parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py index fa4a1d7b63..1bf56f202a 100644 --- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py +++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py @@ -1,6 +1,6 @@ import re, os import json -import toml +import tomli import logging import yaml @@ -93,15 +93,15 @@ def map_dependencies_pipfile(packages, type): #def parse_pipfile(file_handle): -# manifest = toml.load(file_handle) +# manifest = tomli.load(file_handle) # return map_dependencies_pipfile(manifest['packages'],'runtime') + #map_dependencies_pipfile(manifest['dev-packages'], 'develop') ## Erro handling Means that the parse_pipfile(...) old function is assuming the presence of a dev-packages key in the parsed Pipfile, but that key does not exist in some cases. def parse_pipfile(file_handle): - import toml + import tomli try: - manifest = toml.load(file_handle) + manifest = tomli.load(file_handle) except Exception as e: logging.warning(f"Failed to parse Pipfile: {getattr(file_handle, 'name', 'unknown')}, error: {e}") return [] @@ -154,8 +154,8 @@ def parse_setup_py(file_handle): def parse_poetry(file_handle, repo_id=None, path=None): file_name = getattr(file_handle, 'name', 'unknown') try: - manifest = toml.load(file_handle) - except toml.TomlDecodeError as e: + manifest = tomli.load(file_handle) + except tomli.TomlDecodeError as e: logging.warning(f"[Repo ID: {repo_id}] Skipping malformed TOML file: {file_name} at {path}, error: {e}") return [] except Exception as e: @@ -172,7 +172,7 @@ def parse_poetry(file_handle, repo_id=None, path=None): def parse_poetry_lock(file_handle): - manifest = toml.load(file_handle) + manifest = tomli.load(file_handle) deps = list() group = 'runtime' for package in manifest['package']: From 53953694060e81c35d1eeb01fd17103e04a51072 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 16:31:59 -0400 Subject: [PATCH 062/104] swap toml package in dependencies Signed-off-by: Adrian Edwards --- pyproject.toml | 2 +- uv.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 908558f239..8f99591e75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,7 @@ dependencies = [ "tensorflow==2.15.0", "textblob==0.15.3", "textual>=0.73.0", - "toml", + "tomli>=2.2.1", "toolz>=0.8.2", "tornado==6.4.1", "typing-extensions>=4.7", diff --git a/uv.lock b/uv.lock index 819bc2be00..1631362afb 100644 --- a/uv.lock +++ b/uv.lock @@ -207,7 +207,7 @@ dependencies = [ { name = "tensorflow" }, { name = "textblob" }, { name = "textual" }, - { name = "toml" }, + { name = "tomli" }, { name = "toolz" }, { name = "tornado" }, { name = "typing-extensions" }, @@ -332,7 +332,7 @@ requires-dist = [ { name = "tensorflow", specifier = "==2.15.0" }, { name = "textblob", specifier = "==0.15.3" }, { name = "textual", specifier = ">=0.73.0" }, - { name = "toml" }, + { name = "tomli", specifier = ">=2.2.1" }, { name = "toolz", specifier = ">=0.8.2" }, { name = "tornado", specifier = "==6.4.1" }, { name = "typing-extensions", specifier = ">=4.7" }, From 9553151e18d547a51a3bfeffa3424dd62b9aee61 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 16:33:45 -0400 Subject: [PATCH 063/104] use built in tomllib instead Signed-off-by: Adrian Edwards --- .../libyear_util/pypi_parser.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py index 1bf56f202a..7b1d2b1f4a 100644 --- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py +++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py @@ -1,6 +1,6 @@ import re, os import json -import tomli +import tomllib import logging import yaml @@ -93,15 +93,15 @@ def map_dependencies_pipfile(packages, type): #def parse_pipfile(file_handle): -# manifest = tomli.load(file_handle) +# manifest = tomllib.load(file_handle) # return map_dependencies_pipfile(manifest['packages'],'runtime') + #map_dependencies_pipfile(manifest['dev-packages'], 'develop') ## Erro handling Means that the parse_pipfile(...) old function is assuming the presence of a dev-packages key in the parsed Pipfile, but that key does not exist in some cases. def parse_pipfile(file_handle): - import tomli + import tomllib try: - manifest = tomli.load(file_handle) + manifest = tomllib.load(file_handle) except Exception as e: logging.warning(f"Failed to parse Pipfile: {getattr(file_handle, 'name', 'unknown')}, error: {e}") return [] @@ -154,8 +154,8 @@ def parse_setup_py(file_handle): def parse_poetry(file_handle, repo_id=None, path=None): file_name = getattr(file_handle, 'name', 'unknown') try: - manifest = tomli.load(file_handle) - except tomli.TomlDecodeError as e: + manifest = tomllib.load(file_handle) + except tomllib.TomlDecodeError as e: logging.warning(f"[Repo ID: {repo_id}] Skipping malformed TOML file: {file_name} at {path}, error: {e}") return [] except Exception as e: @@ -172,7 +172,7 @@ def parse_poetry(file_handle, repo_id=None, path=None): def parse_poetry_lock(file_handle): - manifest = tomli.load(file_handle) + manifest = tomllib.load(file_handle) deps = list() group = 'runtime' for package in manifest['package']: From 0f27edea91ce72fb1626abe8ccf4c983148b1ef5 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Thu, 23 Oct 2025 16:36:32 -0400 Subject: [PATCH 064/104] tomli only needed on older python versions since its part of the standard lib since 3.11 Signed-off-by: Adrian Edwards --- .../dependency_libyear_tasks/libyear_util/pypi_parser.py | 7 ++++++- pyproject.toml | 2 +- uv.lock | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py index 7b1d2b1f4a..5a99194ccf 100644 --- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py +++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py @@ -1,6 +1,11 @@ import re, os import json -import tomllib +import sys +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib + import logging import yaml diff --git a/pyproject.toml b/pyproject.toml index 8f99591e75..8193867b81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,7 @@ dependencies = [ "tensorflow==2.15.0", "textblob==0.15.3", "textual>=0.73.0", - "tomli>=2.2.1", + "tomli>=2.2.1 ; python_full_version < '3.11'", "toolz>=0.8.2", "tornado==6.4.1", "typing-extensions>=4.7", diff --git a/uv.lock b/uv.lock index 1631362afb..daa844fd49 100644 --- a/uv.lock +++ b/uv.lock @@ -207,7 +207,7 @@ dependencies = [ { name = "tensorflow" }, { name = "textblob" }, { name = "textual" }, - { name = "tomli" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "toolz" }, { name = "tornado" }, { name = "typing-extensions" }, @@ -332,7 +332,7 @@ requires-dist = [ { name = "tensorflow", specifier = "==2.15.0" }, { name = "textblob", specifier = "==0.15.3" }, { name = "textual", specifier = ">=0.73.0" }, - { name = "tomli", specifier = ">=2.2.1" }, + { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.2.1" }, { name = "toolz", specifier = ">=0.8.2" }, { name = "tornado", specifier = "==6.4.1" }, { name = "typing-extensions", specifier = ">=4.7" }, From 1cab8cea7ddd07e311f3626ee6c06ee1fc489fbe Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 19 Nov 2025 16:14:53 -0500 Subject: [PATCH 065/104] remove import from within function Signed-off-by: Adrian Edwards --- .../git/dependency_libyear_tasks/libyear_util/pypi_parser.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py index 5a99194ccf..11b880e04c 100644 --- a/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py +++ b/augur/tasks/git/dependency_libyear_tasks/libyear_util/pypi_parser.py @@ -103,8 +103,6 @@ def map_dependencies_pipfile(packages, type): ## Erro handling Means that the parse_pipfile(...) old function is assuming the presence of a dev-packages key in the parsed Pipfile, but that key does not exist in some cases. def parse_pipfile(file_handle): - import tomllib - try: manifest = tomllib.load(file_handle) except Exception as e: From 59bd91826164333d38a419a95547d208669f2066 Mon Sep 17 00:00:00 2001 From: Shlok Gilda Date: Mon, 15 Dec 2025 01:09:50 -0500 Subject: [PATCH 066/104] Fix null target check in get_release_inf function Signed-off-by: Shlok Gilda --- augur/tasks/github/releases/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py index 255b34cf89..f6b2f5e56b 100644 --- a/augur/tasks/github/releases/core.py +++ b/augur/tasks/github/releases/core.py @@ -38,7 +38,7 @@ def get_release_inf(repo_id, release, tag_only): 'tag_only': tag_only } else: - if 'tagger' in release['target']: + if release['target'] and 'tagger' in release['target']: tagger = release["target"]["tagger"] From 23df46e1deff9085da6ec051595464406e74e750 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 11 Nov 2025 12:46:09 -0500 Subject: [PATCH 067/104] pass through follow_redirects parameter in hit_api so clients can change it Discovered by gpt5 via claude Signed-off-by: Adrian Edwards --- augur/tasks/github/util/github_paginator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/util/github_paginator.py b/augur/tasks/github/util/github_paginator.py index bd141d0c32..990bc4f738 100644 --- a/augur/tasks/github/util/github_paginator.py +++ b/augur/tasks/github/util/github_paginator.py @@ -9,7 +9,7 @@ from enum import Enum -def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', ) -> Optional[httpx.Response]: +def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, method: str = 'GET', follow_redirects=True) -> Optional[httpx.Response]: """Ping the api and get the data back for the page. Returns: @@ -21,7 +21,7 @@ def hit_api(key_manager, url: str, logger: logging.Logger, timeout: float = 10, try: response = client.request( - method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=True) + method=method, url=url, auth=key_manager, timeout=timeout, follow_redirects=follow_redirects) except TimeoutError: logger.info(f"Request timed out. Sleeping {round(timeout)} seconds and trying again...\n") From a393e5b8782300f7ab5a90605e5e0e8cc84f6f4b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 11 Nov 2025 12:46:43 -0500 Subject: [PATCH 068/104] dont follow redirects when checking github move Discovered by gpt5 via claude Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index b302a70a06..251abd3362 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -51,7 +51,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c attempts = 0 while attempts < 10: - response_from_gh = hit_api(key_auth, url, logger) + response_from_gh = hit_api(key_auth, url, logger, follow_redirects=False) if response_from_gh and response_from_gh.status_code != 404: break From 7d1f6f0c56724142ab667cedf144084bf2643318 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 11 Nov 2025 13:18:35 -0500 Subject: [PATCH 069/104] avoid dangerous modification of sqalchemy internal representations when updating the DB Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 251abd3362..0542cb107e 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -21,9 +21,11 @@ def update_repo_with_dict(repo,new_dict,logger): logger: logging object db: db object """ - - to_insert = repo.__dict__ - del to_insert['_sa_instance_state'] + to_insert = {} + to_insert['repo_git'] = repo.repo_git + to_insert['repo_path'] = repo.repo_path + to_insert['repo_name'] = repo.repo_name + to_insert['description'] = repo.description to_insert.update(new_dict) result = bulk_insert_dicts(logger, to_insert, Repo, ['repo_id']) From 30a3b69e38c304010f007254c76dd07afe1d63da Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 11 Nov 2025 13:19:38 -0500 Subject: [PATCH 070/104] perform timeout check before trying to access the response object Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 0542cb107e..69ff84d64b 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -60,6 +60,10 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c attempts += 1 + if attempts >= 10: + logger.error(f"Could not check if repo moved because the api timed out 10 times. Url: {url}") + raise Exception(f"ERROR: Could not get api response for repo: {url}") + #Update Url and retry if 301 #301 moved permanently if response_from_gh.status_code == 301: @@ -119,10 +123,6 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c session.commit() raise Exception("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!") - - if attempts >= 10: - logger.error(f"Could not check if repo moved because the api timed out 10 times. Url: {url}") - raise Exception(f"ERROR: Could not get api response for repo: {url}") #skip if not 404 logger.info(f"Repo found at url: {url}") From c2ef9651edb27141c46c7deef7cefdb914b89d6b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 11 Nov 2025 13:19:57 -0500 Subject: [PATCH 071/104] replace wildcard import with importing the relevant objects Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 69ff84d64b..0abf14dc0a 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -1,5 +1,5 @@ from augur.tasks.github.util.github_task_session import * -from augur.application.db.models import * +from augur.application.db.models import Repo, CollectionStatus from augur.tasks.github.util.github_paginator import hit_api from augur.tasks.github.util.util import get_owner_repo from augur.tasks.github.util.util import parse_json_response From f13cddfe4e7b1a203c835154acef61945531d0fe Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 11 Nov 2025 13:21:43 -0500 Subject: [PATCH 072/104] handle extreme edge case of a 301 redirect with no location field by throwing an exception Assisted-by: GPT5 via cursor Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 0abf14dc0a..8564459ee3 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -67,8 +67,12 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c #Update Url and retry if 301 #301 moved permanently if response_from_gh.status_code == 301: + redirect_location = response_from_gh.headers.get('location') or response_from_gh.headers.get('Location') + if not redirect_location: + logger.error(f"Could not check if repo moved because the redirect location is not present. Url: {url}") + raise Exception(f"ERROR: Could not get redirect location for repo: {url}") - owner, name = extract_owner_and_repo_from_endpoint(key_auth, response_from_gh.headers['location'], logger) + owner, name = extract_owner_and_repo_from_endpoint(key_auth, redirect_location, logger) try: old_description = str(repo.description) From cd9b090109b886ece4d503195b771ac33cca8eca Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 11 Nov 2025 13:22:16 -0500 Subject: [PATCH 073/104] stop retrying the request if any response codes from github are received. Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 8564459ee3..3fb4ad3591 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -55,7 +55,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c while attempts < 10: response_from_gh = hit_api(key_auth, url, logger, follow_redirects=False) - if response_from_gh and response_from_gh.status_code != 404: + if response_from_gh: break attempts += 1 From 4d909d339575356098b7cf97329258b31f2d5adf Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 11 Nov 2025 14:49:43 -0500 Subject: [PATCH 074/104] add missing repo_id value Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 3fb4ad3591..4938c9fa75 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -22,6 +22,7 @@ def update_repo_with_dict(repo,new_dict,logger): db: db object """ to_insert = {} + to_insert['repo_id'] = repo.repo_id # this is here because its needed as a unique key for bulk_insert_dicts to_insert['repo_git'] = repo.repo_git to_insert['repo_path'] = repo.repo_path to_insert['repo_name'] = repo.repo_name From 62926f01aee8acf8c08f3db611aa25e76d8ac03e Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 17 Nov 2025 11:59:34 -0500 Subject: [PATCH 075/104] ok turns out the limited dict stuff broke and is causing nulls in the db Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 4938c9fa75..8e57762400 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -21,12 +21,8 @@ def update_repo_with_dict(repo,new_dict,logger): logger: logging object db: db object """ - to_insert = {} - to_insert['repo_id'] = repo.repo_id # this is here because its needed as a unique key for bulk_insert_dicts - to_insert['repo_git'] = repo.repo_git - to_insert['repo_path'] = repo.repo_path - to_insert['repo_name'] = repo.repo_name - to_insert['description'] = repo.description + to_insert = dict(repo.__dict__) + del to_insert['_sa_instance_state'] to_insert.update(new_dict) result = bulk_insert_dicts(logger, to_insert, Repo, ['repo_id']) From 14eb94337fe3c4c7659510506083969c80ab8ca0 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 19 Nov 2025 14:14:31 -0500 Subject: [PATCH 076/104] use custom exception types to bubble the exceptions up a level and catch them to re-emit celery exceptions. Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 10 ++++++++-- augur/tasks/github/detect_move/tasks.py | 11 +++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 8e57762400..6b47df1a32 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -9,6 +9,12 @@ from augur.application.db.lib import bulk_insert_dicts +class RepoMovedException(Exception): + pass + +class RepoGoneException(Exception): + pass + def update_repo_with_dict(repo,new_dict,logger): """ @@ -86,7 +92,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c update_repo_with_dict(repo, repo_update_dict, logger) - raise Exception("ERROR: Repo has moved! Resetting Collection!") + raise RepoMovedException("ERROR: Repo has moved! Resetting Collection!") #Mark as ignore if 404 if response_from_gh.status_code == 404: @@ -122,7 +128,7 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c session.commit() - raise Exception("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!") + raise RepoGoneException("ERROR: Repo has moved, and there is no redirection! 404 returned, not 301. Resetting Collection!") #skip if not 404 diff --git a/augur/tasks/github/detect_move/tasks.py b/augur/tasks/github/detect_move/tasks.py index f542d89289..6f7b04b8de 100644 --- a/augur/tasks/github/detect_move/tasks.py +++ b/augur/tasks/github/detect_move/tasks.py @@ -1,11 +1,13 @@ import logging -from augur.tasks.github.detect_move.core import * +from augur.tasks.github.detect_move.core import ping_github_for_repo_move, RepoMovedException, RepoGoneException from augur.tasks.init.celery_app import celery_app as celery from augur.tasks.init.celery_app import AugurCoreRepoCollectionTask, AugurSecondaryRepoCollectionTask from augur.application.db.lib import get_repo_by_repo_git, get_session from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth +from celery.exceptions import Retry, Reject + @celery.task(base=AugurCoreRepoCollectionTask) def detect_github_repo_move_core(repo_git : str) -> None: @@ -24,7 +26,12 @@ def detect_github_repo_move_core(repo_git : str) -> None: #Ping each repo with the given repo_git to make sure #that they are still in place. - ping_github_for_repo_move(session, key_auth, repo, logger) + try: + ping_github_for_repo_move(session, key_auth, repo, logger) + except RepoMovedException as e: + raise Retry(e) + except RepoGoneException as e: + raise Reject(e) @celery.task(base=AugurSecondaryRepoCollectionTask) From 629da3a8ae2fae8d798731621049b51e1520f5a6 Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Fri, 19 Dec 2025 00:39:45 +0530 Subject: [PATCH 077/104] Fix: Updated link of redis windows installation Signed-off-by: PredictiveManish --- docs/source/getting-started/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index 41bc1be4dc..7f270e3f0f 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -48,7 +48,7 @@ Caching System (Redis) ---------------------- * `Linux Installation `__ * `Mac Installation `__ -* `Windows Installation `__ +* `Windows Installation `__ Message Broker (RabbitMQ) ------------------------- From ffd2505980707811e6a5fa73c8de9876fa650b0b Mon Sep 17 00:00:00 2001 From: PredictiveManish Date: Fri, 19 Dec 2025 00:43:10 +0530 Subject: [PATCH 078/104] fix: Updated links for Redis Installation Signed-off-by: PredictiveManish --- docs/source/getting-started/installation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index 7f270e3f0f..e7dce6ffbe 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -46,8 +46,8 @@ The ``message_insights_worker`` uses a system-level package called OpenMP. You w Caching System (Redis) ---------------------- -* `Linux Installation `__ -* `Mac Installation `__ +* `Linux Installation `__ +* `Mac Installation `__ * `Windows Installation `__ Message Broker (RabbitMQ) From d410b3b9ccab25362fd3f608828651052fd6476d Mon Sep 17 00:00:00 2001 From: Kushagra Date: Fri, 12 Sep 2025 15:37:02 +0530 Subject: [PATCH 079/104] Fix broken 8knot link in README Signed-off-by: Kushagra Signed-off-by: Adrian Edwards --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 16bd88f2e5..883b28d982 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Augur is now releasing a dramatically improved new version. It is also available - A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard. - Materialized views to increase the snappiness of API’s and Frontends on large scale data. - Changes to primary keys, which now employ a UUID strategy that ensures unique keys across all Augur instances. - - Support for [8knot](https://github.com/oss-aspen/8kno) dashboards (view a sample [here](https://eightknot.osci.io/)). + - Support for [8knot](https://github.com/oss-aspen/8knot) dashboards (view a sample [here](https://eightknot.osci.io/)). *beautification coming soon!* - Data collection completeness assurance enabled by a structured, relational data set that is easily compared with platform API Endpoints. - The next release of the new version will include a hosted version of Augur where anyone can create an account and add repos *they care about*. From 49af484d288ed33b95a483b7436de1e6a4bae0c6 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 19 Dec 2025 14:07:46 -0500 Subject: [PATCH 080/104] update Code of Conduct link Signed-off-by: Adrian Edwards --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 883b28d982..e59180de0c 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ If you get stuck, please feel free to [ask for help](https://github.com/chaoss/a ## Contributing -To contribute to Augur, please follow the guidelines found in our [CONTRIBUTING.md](CONTRIBUTING.md) and our [Code of Conduct](CODE_OF_CONDUCT.md). Augur is a welcoming community that is open to all, regardless if you're working on your 1000th contribution to open source or your 1st. +To contribute to Augur, please follow the guidelines found in our [CONTRIBUTING.md](CONTRIBUTING.md) and the CHAOSS [Code of Conduct]([CODE_OF_CONDUCT.md](https://github.com/chaoss/.github/blob/main/CODE_OF_CONDUCT.md)). Augur is a welcoming community that is open to all, regardless if you're working on your 1000th contribution to open source or your 1st. We strongly believe that much of what makes open source so great is the incredible communities it brings together, so we invite you to join us! ## License, Copyright, and Funding From 3b6f57572721cdb78f546d3da9b8a8eedd7996e7 Mon Sep 17 00:00:00 2001 From: Pratyksh Gupta Date: Sat, 27 Dec 2025 23:16:20 +0530 Subject: [PATCH 081/104] Fix #3474: Add default value for AUGUR_DOCKER_DEPLOY to prevent AttributeError on bare metal installs Signed-off-by: Pratyksh Gupta --- augur/api/gunicorn_conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/api/gunicorn_conf.py b/augur/api/gunicorn_conf.py index dd1bfc6961..6586b6f69a 100644 --- a/augur/api/gunicorn_conf.py +++ b/augur/api/gunicorn_conf.py @@ -40,7 +40,7 @@ # set the log location for gunicorn logs_directory = get_value('Logging', 'logs_directory') -is_docker = os.getenv("AUGUR_DOCKER_DEPLOY").lower() in ('true', '1', 't', 'y', 'yes') +is_docker = os.getenv("AUGUR_DOCKER_DEPLOY", 'False').lower() in ('true', '1', 't', 'y', 'yes') accesslog = f"{logs_directory}/gunicorn.log" errorlog = f"{logs_directory}/gunicorn.log" From 0156030f2bb76c4d8b76cc21a295cb749f519b90 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 6 Jan 2026 13:01:48 -0500 Subject: [PATCH 082/104] allow config sources in the config class to be overridden via a parameter and skip the db stuff Signed-off-by: Adrian Edwards --- augur/application/config.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index a3fe00a2e0..5a42231484 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -143,24 +143,28 @@ def base_config(self): return config - def __init__(self, logger, session: DatabaseSession): + def __init__(self, logger, session: DatabaseSession, config_sources: list = None): self.session = session self.logger = logger self.accepted_types = ["str", "bool", "int", "float", "NoneType"] - # list items in order of precedence. lowest precedence (i.e. fallback) values first - self.config_sources = [ - JsonConfig(default_config, logger) - ] + if not config_sources: + # list items in order of precedence. lowest precedence (i.e. fallback) values first + config_sources = [ + JsonConfig(default_config, logger) + ] - config_dir = Path(os.getenv("CONFIG_DATADIR", "./")) - config_path = config_dir.joinpath("augur.json") - if config_path.exists(): - self.config_sources.append(JsonConfig(json.loads(config_path.read_text(encoding="UTF-8")), logger)) + config_dir = Path(os.getenv("CONFIG_DATADIR", "./")) + config_path = config_dir.joinpath("augur.json") + if config_path.exists(): + config_sources.append(JsonConfig(json.loads(config_path.read_text(encoding="UTF-8")), logger)) + + config_sources.append( DatabaseConfig(session, logger) ) + + self.config_sources = config_sources - self.config_sources.append( DatabaseConfig(session, logger) ) def _get_writable_source(self) -> 'ConfigStore': """Returns the highest precedence source that can be written to. From bcbe9e8472d1d2d720fec25a544732ac710687bb Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 6 Jan 2026 13:05:48 -0500 Subject: [PATCH 083/104] Write unit test demonstrating the problem Signed-off-by: Adrian Edwards --- tests/test_classes/test_config_stores.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py index 69fe19017f..30fd09f541 100644 --- a/tests/test_classes/test_config_stores.py +++ b/tests/test_classes/test_config_stores.py @@ -109,3 +109,23 @@ def test_fetching_real_defaults(mock_logger, mock_session): cfg.config_sources = [JsonConfig(default_config, mock_logger)] assert cfg.get_value("Redis", "cache_group") == 0 + + +def test_get_section_incorporates_hierarchy(): + + default_dict = { + "Section1": {"alpha": 1, "beta": "x"}, + "Section2": {"gamma": False, "delta": 3.14}, + } + + override_dict = { + "Section1": {"beta": "y"}, + "Section2": {"gamma": False, "delta": 3.14}, + } + + cfg = AugurConfig(None, None, [JsonConfig(default_dict, mock_logger), JsonConfig(override_dict, mock_logger)]) + + expected_dict = {"alpha": 1, "beta": "y"} + + assert cfg.get_section("Section1") == expected_dict + From 6dbb3bf62bef35a6eb3dfef49b0d77ef8b9de396 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 6 Jan 2026 13:39:42 -0500 Subject: [PATCH 084/104] modify the test to utilize load_config, since thats whats relied on for the merging functionality Signed-off-by: Adrian Edwards --- tests/test_classes/test_config_stores.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py index 30fd09f541..b55275cfb7 100644 --- a/tests/test_classes/test_config_stores.py +++ b/tests/test_classes/test_config_stores.py @@ -111,6 +111,30 @@ def test_fetching_real_defaults(mock_logger, mock_session): assert cfg.get_value("Redis", "cache_group") == 0 +def test_load_config_utilizes_hierarchy(): + + default_dict = { + "Section1": {"alpha": 1, "beta": "x"}, + "Section2": {"gamma": False, "delta": 3.14}, + } + + override_dict = { + "Section1": {"beta": "y"}, + "Section2": {"Epsilon": True, "delta": 6.28}, + "Section3": {"hi": "there"} + } + + cfg = AugurConfig(None, None, [JsonConfig(default_dict, mock_logger), JsonConfig(override_dict, mock_logger)]) + + expected_dict = { + "Section1": {"alpha": 1, "beta": "y"}, + "Section2": {"gamma": False, "Epsilon": True, "delta": 6.28}, + "Section3": {"hi": "there"} # test that new sections are accounted for too + } + + assert cfg.load_config() == expected_dict + + def test_get_section_incorporates_hierarchy(): default_dict = { From a7b2cd9c836e5dd6d7a183fe0e86d74e2c6a33e6 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 6 Jan 2026 13:41:54 -0500 Subject: [PATCH 085/104] Fix the issue Signed-off-by: Adrian Edwards --- augur/application/config.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/augur/application/config.py b/augur/application/config.py index 5a42231484..ded2f419cc 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -234,10 +234,36 @@ def load_config(self) -> dict: Returns: The config from all sources """ + + def merge(a: dict, b: dict, path=[]): + """Do a deep merge of two python dictionaries (standard library update and merge dont do this) + This is what allows updated values in higher priority config sources to take precedence. + + This function is lightly modified from https://stackoverflow.com/a/7205107 + + Args: + a (dict): The dict to merge into. Will be mutated + b (dict): The incoming dict to merge in. Data in this dict will take precedence when there is a conflict + path (list, optional): Keeps track of the path during the recursion process. Not intended for use by consumers. Defaults to []. + + Returns: + dict: The dict passed in via parameter a, now modified with the new values + """ + for key in b: + if key in a: + if isinstance(a[key], dict) and isinstance(b[key], dict): + merge(a[key], b[key], path + [str(key)]) + elif a[key] != b[key]: + # raise Exception('Conflict at ' + '.'.join(path + [str(key)])) + a[key] = b[key] + else: + a[key] = b[key] + return a + config = {} for config_source in self.config_sources: - config.update(config_source.retrieve_dict()) + merge(config, config_source.retrieve_dict()) return config From 7dc00820adaa72c9ec70c223c91f2bad0c732228 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 6 Jan 2026 13:42:10 -0500 Subject: [PATCH 086/104] improve how JsonSource's identify themselves in the logs Signed-off-by: Adrian Edwards --- augur/application/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index ded2f419cc..8c5b452eb5 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -624,6 +624,9 @@ def get_value(self, section_name: str, value_key: str): return self.json_data[section_name].get(value_key, None) + def __repr__(self): + return f"JsonSource({self.json_data})" + class DatabaseConfig(ConfigStore): From dfa43c851ad143b15b55a97e8c0231f0a346f53a Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 6 Jan 2026 13:47:18 -0500 Subject: [PATCH 087/104] prevent accidental modification of JSON config values from externally Signed-off-by: Adrian Edwards --- augur/application/config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index 8c5b452eb5..1f91a3ef0a 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -1,6 +1,7 @@ import sqlalchemy as s from sqlalchemy import and_, update import json +import copy from typing import List, Any, Optional import os from augur.application.db.models import Config @@ -559,6 +560,11 @@ def load_dict(self, data: dict, ignore_existing=False): self.json_data.update(data) def retrieve_dict(self): + # if this dict isnt supposed to be mutable, we need to make a copy + # this prevents being able to change data in this object by reference + + if not self.writable: + return copy.deepcopy(self.json_data) return self.json_data def clear(self): From b5a126dbca93504ad010b06f67bd280d6af78e0d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 6 Jan 2026 13:58:38 -0500 Subject: [PATCH 088/104] add docs for the init parameters Signed-off-by: Adrian Edwards --- augur/application/config.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index 1f91a3ef0a..d6447c7bdf 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -145,6 +145,14 @@ def base_config(self): return config def __init__(self, logger, session: DatabaseSession, config_sources: list = None): + """Create a new AugurConfig class + + Args: + logger (_type_): The logger instance to use for logging + session (DatabaseSession): a connection to the database for configuring the database source. + config_sources (list, optional): An alternative way to pass in config sources. Used for unit testing only. + Specifying a value here enables you to supply `None` to the `session` argument, since it will be unused. Defaults to None. + """ self.session = session self.logger = logger From 155186f5f0c373907a63703f64d705247e7c46ca Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Tue, 6 Jan 2026 15:52:42 -0800 Subject: [PATCH 089/104] remove path tracking Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- augur/application/config.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/augur/application/config.py b/augur/application/config.py index d6447c7bdf..41a7290200 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -244,7 +244,7 @@ def load_config(self) -> dict: The config from all sources """ - def merge(a: dict, b: dict, path=[]): + def merge(a: dict, b: dict): """Do a deep merge of two python dictionaries (standard library update and merge dont do this) This is what allows updated values in higher priority config sources to take precedence. @@ -253,7 +253,6 @@ def merge(a: dict, b: dict, path=[]): Args: a (dict): The dict to merge into. Will be mutated b (dict): The incoming dict to merge in. Data in this dict will take precedence when there is a conflict - path (list, optional): Keeps track of the path during the recursion process. Not intended for use by consumers. Defaults to []. Returns: dict: The dict passed in via parameter a, now modified with the new values @@ -261,9 +260,8 @@ def merge(a: dict, b: dict, path=[]): for key in b: if key in a: if isinstance(a[key], dict) and isinstance(b[key], dict): - merge(a[key], b[key], path + [str(key)]) + merge(a[key], b[key]) elif a[key] != b[key]: - # raise Exception('Conflict at ' + '.'.join(path + [str(key)])) a[key] = b[key] else: a[key] = b[key] From 64ad1a5c0f54e94e4f5b5e84633cc777f9207680 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 7 Jan 2026 09:46:36 -0500 Subject: [PATCH 090/104] fix test for retrieving the correct dict Signed-off-by: Adrian Edwards --- tests/test_classes/test_config_stores.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py index b55275cfb7..d6d1fb81a0 100644 --- a/tests/test_classes/test_config_stores.py +++ b/tests/test_classes/test_config_stores.py @@ -30,7 +30,7 @@ def test_jsonconfig_retrieve_has_get(mock_logger): cfg = JsonConfig(data, mock_logger) # retrieve full dict - assert cfg.retrieve_dict() is data + assert cfg.retrieve_dict() == data # has/get section assert cfg.has_section("Alpha") is True From b5920e1da2521e7fc0f7bcbeaae559936d937473 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 7 Jan 2026 09:51:33 -0500 Subject: [PATCH 091/104] add and fix test case for verifying write protection for the JSON config Signed-off-by: Adrian Edwards --- augur/application/config.py | 2 ++ tests/test_classes/test_config_stores.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/augur/application/config.py b/augur/application/config.py index 41a7290200..f46f6dc278 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -546,6 +546,8 @@ class JsonConfig(ConfigStore): def __init__(self, json_data, logger: logging.Logger): super().__init__(logger) + if not self.writable: + json_data = copy.deepcopy(json_data) self.json_data = json_data @property diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py index d6d1fb81a0..003f19431d 100644 --- a/tests/test_classes/test_config_stores.py +++ b/tests/test_classes/test_config_stores.py @@ -25,6 +25,25 @@ def test_jsonconfig_empty_true_false(mock_logger): assert JsonConfig({"A": {}}, mock_logger).empty is False +def test_jsonconfig_write_protection(mock_logger): + # JsonConfig should be not writeable by default, so we should be unable to change + # its values, even by abusing references + + data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} + cfg = JsonConfig(data, mock_logger) + + # mutation via input + data["Alpha"]["a"] = 2 + + config_test = cfg.retrieve_dict() + assert config_test != data # the data in the config should not change + + # mutation via output + config_test["Alpha"]["a"] = 3 + + config_test = cfg.retrieve_dict() + assert config_test != data # the data in the config should not change + def test_jsonconfig_retrieve_has_get(mock_logger): data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} cfg = JsonConfig(data, mock_logger) From 0fb462a388f588280f87d58644b3e86d1122bd88 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 19 Nov 2025 17:15:44 -0500 Subject: [PATCH 092/104] add CI job for running the unit tests with pytest using pytest avoids two layers of python environment "the gap between task runners like tox and test runners like pytest is narrower now" - Gemini Signed-off-by: Adrian Edwards --- .github/workflows/functional_test.yml | 30 +++++++++++++++++++++++++++ pyproject.toml | 13 ++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 .github/workflows/functional_test.yml diff --git a/.github/workflows/functional_test.yml b/.github/workflows/functional_test.yml new file mode 100644 index 0000000000..06e3af0ef1 --- /dev/null +++ b/.github/workflows/functional_test.yml @@ -0,0 +1,30 @@ +name: "Functional tests" +# Runs automated test suites that ensure functionality is preserved. Any failures should prevent code from shipping. +on: + pull_request: + branches: [main, release] + +permissions: + contents: read + +jobs: + test: + name: test with ${{ matrix.env }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + env: ["3.10", "3.11", "3.12", "3.13", "3.14"] + os: [ubuntu-latest, macos-latest] + steps: + - uses: actions/checkout@v5 + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + enable-cache: true + + - name: Run Tests + run: | + uv run --python ${{ matrix.env }} pytest \ + tests/test_classes \ + --color=yes \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8193867b81..064b5e7bdd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -187,6 +187,19 @@ legacy_tox_ini = """ addopts = -ra -s """ +[tool.pytest.ini_options] +addopts = "-ra -s" +testpaths = [ + "tests/test_classes", + # "tests/test_routes", # runs, but needs a fixture for connecting to the web interface of Augur + # "tests/test_metrics", + # "tests/test_tasks", + # "tests/test_application", + # "tests/test_workers", + # "tests/test_workers/worker_persistence/", + # "tests/test_routes/runner.py" +] + [tool.mypy] files = ['augur/application/db/*.py'] ignore_missing_imports = true From 76b7ceb11304189802ae75587d7ebe43e01b98a6 Mon Sep 17 00:00:00 2001 From: pushpit kamboj Date: Fri, 9 Jan 2026 08:36:14 +0530 Subject: [PATCH 093/104] (fix): remove no else raise and no else return rules from .pylintrc Signed-off-by: pushpit kamboj --- .pylintrc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pylintrc b/.pylintrc index c319333602..f18952423d 100644 --- a/.pylintrc +++ b/.pylintrc @@ -142,7 +142,7 @@ confidence=HIGH, # Only enable specific messages disable=all -enable=unused-import,redefined-outer-name,E1206,E1205,E0704,E0107,E4702,E1101,E0211,E0213,E0103,E1133,E1120,E3102,E0602,E1123,E0001,W0702,W1404,W0706,W0101,W0120,W0718,R1737,R1705,R1720,R1724,R1723,R0401,R1701,C1802,C0200,C0501,C0201,W1001,E1102,R0923 +enable=unused-import,redefined-outer-name,E1206,E1205,E0704,E0107,E4702,E1101,E0211,E0213,E0103,E1133,E1120,E3102,E0602,E1123,E0001,W0702,W1404,W0706,W0101,W0120,W0718,R1737,R1724,R1723,R0401,R1701,C1802,C0200,C0501,C0201,W1001,E1102,R0923 [LOGGING] From 53eed1a9213af447bb6cd6b380a701fa963dd4b7 Mon Sep 17 00:00:00 2001 From: iGufrankhan Date: Sat, 10 Jan 2026 00:33:11 +0000 Subject: [PATCH 094/104] Remove stale explorer_libyear_detail refresh Signed-off-by: iGufrankhan --- scripts/control/refresh-matviews.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/control/refresh-matviews.sh b/scripts/control/refresh-matviews.sh index 576466b2df..1d1756031d 100644 --- a/scripts/control/refresh-matviews.sh +++ b/scripts/control/refresh-matviews.sh @@ -6,6 +6,5 @@ psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.augur_new_contributors with data;' psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_contributor_actions with data;' psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_all with data;' -psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_libyear_detail with data;' psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_new_contributors with data;' psql -U augur -h localhost -p 5432 -d padres -c 'REFRESH MATERIALIZED VIEW augur_data.explorer_entry_list with data;' \ No newline at end of file From 22f81b56a81cd7e4dc04938df7fcf977fe95fc9a Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Tue, 6 Jan 2026 17:13:14 -0800 Subject: [PATCH 095/104] Revert database url retrieval so bare metal works Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- augur/application/schema/alembic/env.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index bf2993c4b1..5b00c4a80d 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -3,6 +3,7 @@ from alembic import context from augur.application.db.models.base import Base +from augur.application.db.engine import get_database_string from sqlalchemy import create_engine from dotenv import load_dotenv import os @@ -33,7 +34,7 @@ # possibly swap sqlalchemy.url with AUGUR_DB env var too -sqlalchemy_url = os.getenv("AUGUR_DB") or config.get_main_option("sqlalchemy.url") +sqlalchemy_url = get_database_string() VERSIONS_DIR = Path(__file__).parent / "versions" From 41059062a76ba183a32ae84617395098255a35c6 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Tue, 6 Jan 2026 17:21:35 -0800 Subject: [PATCH 096/104] unused os import Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- augur/application/schema/alembic/env.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/augur/application/schema/alembic/env.py b/augur/application/schema/alembic/env.py index 5b00c4a80d..3ae3afdb53 100644 --- a/augur/application/schema/alembic/env.py +++ b/augur/application/schema/alembic/env.py @@ -6,7 +6,6 @@ from augur.application.db.engine import get_database_string from sqlalchemy import create_engine from dotenv import load_dotenv -import os import re from pathlib import Path @@ -32,11 +31,8 @@ # my_important_option = config.get_main_option("my_important_option") # ... etc. -# possibly swap sqlalchemy.url with AUGUR_DB env var too - sqlalchemy_url = get_database_string() - VERSIONS_DIR = Path(__file__).parent / "versions" def _next_int_rev() -> str: From 43aaf92aac4d9a1688c847aab05b886e79e0abf3 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Fri, 9 Jan 2026 22:37:42 -0500 Subject: [PATCH 097/104] Disable tests for 3.12+ so they work Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- .github/workflows/functional_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/functional_test.yml b/.github/workflows/functional_test.yml index 06e3af0ef1..544029df0e 100644 --- a/.github/workflows/functional_test.yml +++ b/.github/workflows/functional_test.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - env: ["3.10", "3.11", "3.12", "3.13", "3.14"] + env: ["3.10", "3.11"] #, "3.12", "3.13", "3.14" os: [ubuntu-latest, macos-latest] steps: - uses: actions/checkout@v5 @@ -27,4 +27,4 @@ jobs: run: | uv run --python ${{ matrix.env }} pytest \ tests/test_classes \ - --color=yes \ No newline at end of file + --color=yes From 4e2d0143043d2cd341f0a96baa20f0836dfbf68d Mon Sep 17 00:00:00 2001 From: Noaman-Akhtar Date: Sat, 10 Jan 2026 22:42:46 +0000 Subject: [PATCH 098/104] Deleted the augur-retired-sql.schema file Signed-off-by: Noaman-Akhtar --- docker/database/augur-retired-sql.schema | 10330 --------------------- 1 file changed, 10330 deletions(-) delete mode 100644 docker/database/augur-retired-sql.schema diff --git a/docker/database/augur-retired-sql.schema b/docker/database/augur-retired-sql.schema deleted file mode 100644 index ac3872a1d5..0000000000 --- a/docker/database/augur-retired-sql.schema +++ /dev/null @@ -1,10330 +0,0 @@ --- --- PostgreSQL database dump --- - --- Dumped from database version 12.11 (Ubuntu 12.11-0ubuntu0.20.04.1) --- Dumped by pg_dump version 12.11 (Ubuntu 12.11-0ubuntu0.20.04.1) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - --- --- Name: augur_data; Type: SCHEMA; Schema: -; Owner: augur --- - -CREATE SCHEMA augur_data; - - -ALTER SCHEMA augur_data OWNER TO augur; - --- --- Name: augur_operations; Type: SCHEMA; Schema: -; Owner: augur --- - -CREATE SCHEMA augur_operations; - - -ALTER SCHEMA augur_operations OWNER TO augur; - --- --- Name: spdx; Type: SCHEMA; Schema: -; Owner: augur --- - -CREATE SCHEMA spdx; - - -ALTER SCHEMA spdx OWNER TO augur; - --- --- Name: toss_specific; Type: SCHEMA; Schema: -; Owner: augur --- - -CREATE SCHEMA toss_specific; - - -ALTER SCHEMA toss_specific OWNER TO augur; - --- --- Name: refresh_aggregates(); Type: PROCEDURE; Schema: augur_data; Owner: augur --- - -CREATE PROCEDURE augur_data.refresh_aggregates() - LANGUAGE plpgsql - AS $$ - begin - perform pg_advisory_lock(124); - execute 'REFRESH MATERIALIZED VIEW "augur_data"."issue_reporter_created_at"'; - perform pg_advisory_unlock(124); - end; -$$; - - -ALTER PROCEDURE augur_data.refresh_aggregates() OWNER TO augur; - --- --- Name: create_constraint_if_not_exists(text, text, text); Type: FUNCTION; Schema: public; Owner: augur --- - -CREATE FUNCTION public.create_constraint_if_not_exists(t_name text, c_name text, constraint_sql text) RETURNS void - LANGUAGE plpgsql - AS $$ - BEGIN - -- Look for our constraint - IF NOT EXISTS (SELECT constraint_name - FROM information_schema.constraint_column_usage - WHERE constraint_name = c_name) THEN - EXECUTE 'ALTER TABLE ' || t_name || ' ADD CONSTRAINT ' || c_name || ' ' || constraint_sql; - END IF; - END; -$$; - - -ALTER FUNCTION public.create_constraint_if_not_exists(t_name text, c_name text, constraint_sql text) OWNER TO augur; - --- --- Name: pc_chartoint(character varying); Type: FUNCTION; Schema: public; Owner: augur --- - -CREATE FUNCTION public.pc_chartoint(chartoconvert character varying) RETURNS integer - LANGUAGE sql IMMUTABLE STRICT - AS $_$ -SELECT CASE WHEN trim($1) SIMILAR TO '[0-9]+' - THEN CAST(trim($1) AS integer) - ELSE NULL END; - -$_$; - - -ALTER FUNCTION public.pc_chartoint(chartoconvert character varying) OWNER TO augur; - --- --- Name: refresh_aggregates(); Type: PROCEDURE; Schema: public; Owner: augur --- - -CREATE PROCEDURE public.refresh_aggregates() - LANGUAGE plpgsql - AS $$ - begin - perform pg_advisory_lock(124); - execute 'REFRESH MATERIALIZED VIEW "augur_data"."issue_reporter_created_at"'; - perform pg_advisory_unlock(124); - end; -$$; - - -ALTER PROCEDURE public.refresh_aggregates() OWNER TO augur; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: analysis_log; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.analysis_log ( - repos_id integer NOT NULL, - status character varying NOT NULL, - date_attempted timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE augur_data.analysis_log OWNER TO augur; - --- --- Name: augur_data.repo_insights_ri_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data."augur_data.repo_insights_ri_id_seq" - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data."augur_data.repo_insights_ri_id_seq" OWNER TO augur; - --- --- Name: chaoss_metric_status_cms_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.chaoss_metric_status_cms_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.chaoss_metric_status_cms_id_seq OWNER TO augur; - --- --- Name: chaoss_metric_status; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.chaoss_metric_status ( - cms_id bigint DEFAULT nextval('augur_data.chaoss_metric_status_cms_id_seq'::regclass) NOT NULL, - cm_group character varying, - cm_source character varying, - cm_type character varying, - cm_backend_status character varying, - cm_frontend_status character varying, - cm_defined boolean, - cm_api_endpoint_repo character varying, - cm_api_endpoint_rg character varying, - cm_name character varying, - cm_working_group character varying, - cm_info json, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cm_working_group_focus_area character varying -); - - -ALTER TABLE augur_data.chaoss_metric_status OWNER TO augur; - --- --- Name: TABLE chaoss_metric_status; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.chaoss_metric_status IS 'This table used to track CHAOSS Metric implementations in Augur, but due to the constantly changing location of that information, it is for the moment not actively populated. '; - - --- --- Name: chaoss_user; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.chaoss_user ( - chaoss_id bigint NOT NULL, - chaoss_login_name character varying, - chaoss_login_hashword character varying, - chaoss_email character varying, - chaoss_text_phone character varying, - chaoss_first_name character varying, - chaoss_last_name character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(6) with time zone DEFAULT now() -); - - -ALTER TABLE augur_data.chaoss_user OWNER TO augur; - --- --- Name: chaoss_user_chaoss_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.chaoss_user_chaoss_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.chaoss_user_chaoss_id_seq OWNER TO augur; - --- --- Name: chaoss_user_chaoss_id_seq; Type: SEQUENCE OWNED BY; Schema: augur_data; Owner: augur --- - -ALTER SEQUENCE augur_data.chaoss_user_chaoss_id_seq OWNED BY augur_data.chaoss_user.chaoss_id; - - --- --- Name: commit_comment_ref_cmt_comment_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.commit_comment_ref_cmt_comment_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.commit_comment_ref_cmt_comment_id_seq OWNER TO augur; - --- --- Name: commit_comment_ref; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.commit_comment_ref ( - cmt_comment_id bigint DEFAULT nextval('augur_data.commit_comment_ref_cmt_comment_id_seq'::regclass) NOT NULL, - cmt_id bigint NOT NULL, - repo_id bigint, - msg_id bigint NOT NULL, - user_id bigint NOT NULL, - body text, - line bigint, - "position" bigint, - commit_comment_src_node_id character varying, - cmt_comment_src_id bigint NOT NULL, - created_at timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.commit_comment_ref OWNER TO augur; - --- --- Name: COLUMN commit_comment_ref.commit_comment_src_node_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.commit_comment_ref.commit_comment_src_node_id IS 'For data provenance, we store the source node ID if it exists. '; - - --- --- Name: COLUMN commit_comment_ref.cmt_comment_src_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.commit_comment_ref.cmt_comment_src_id IS 'For data provenance, we store the source ID if it exists. '; - - --- --- Name: commit_parents_parent_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.commit_parents_parent_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.commit_parents_parent_id_seq OWNER TO augur; - --- --- Name: commit_parents; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.commit_parents ( - cmt_id bigint NOT NULL, - parent_id bigint DEFAULT nextval('augur_data.commit_parents_parent_id_seq'::regclass) NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.commit_parents OWNER TO augur; - --- --- Name: commits_cmt_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.commits_cmt_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.commits_cmt_id_seq OWNER TO augur; - --- --- Name: commits; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.commits ( - cmt_id bigint DEFAULT nextval('augur_data.commits_cmt_id_seq'::regclass) NOT NULL, - repo_id bigint NOT NULL, - cmt_commit_hash character varying(80) NOT NULL, - cmt_author_name character varying NOT NULL, - cmt_author_raw_email character varying NOT NULL, - cmt_author_email character varying NOT NULL, - cmt_author_date character varying(10) NOT NULL, - cmt_author_affiliation character varying DEFAULT 'NULL'::character varying, - cmt_committer_name character varying NOT NULL, - cmt_committer_raw_email character varying NOT NULL, - cmt_committer_email character varying NOT NULL, - cmt_committer_date character varying NOT NULL, - cmt_committer_affiliation character varying DEFAULT 'NULL'::character varying, - cmt_added integer NOT NULL, - cmt_removed integer NOT NULL, - cmt_whitespace integer NOT NULL, - cmt_filename character varying NOT NULL, - cmt_date_attempted timestamp(0) without time zone NOT NULL, - cmt_ght_committer_id integer, - cmt_ght_committed_at timestamp(0) without time zone, - cmt_committer_timestamp timestamp(0) with time zone, - cmt_author_timestamp timestamp(0) with time zone, - cmt_author_platform_username character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cmt_ght_author_id uuid -); - - -ALTER TABLE augur_data.commits OWNER TO augur; - --- --- Name: TABLE commits; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.commits IS 'Commits. -Each row represents changes to one FILE within a single commit. So you will encounter multiple rows per commit hash in many cases. '; - - --- --- Name: contributor_affiliations_ca_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.contributor_affiliations_ca_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.contributor_affiliations_ca_id_seq OWNER TO augur; - --- --- Name: contributor_affiliations; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.contributor_affiliations ( - ca_id bigint DEFAULT nextval('augur_data.contributor_affiliations_ca_id_seq'::regclass) NOT NULL, - ca_domain character varying(64) NOT NULL, - ca_start_date date DEFAULT '1970-01-01'::date, - ca_last_used timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - ca_affiliation character varying, - ca_active smallint DEFAULT 1, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.contributor_affiliations OWNER TO augur; - --- --- Name: TABLE contributor_affiliations; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.contributor_affiliations IS 'This table exists outside of relations with other tables. The purpose is to provide a dynamic, owner maintained (and augur augmented) list of affiliations. This table is processed in affiliation information in the DM_ tables generated when Augur is finished counting commits using the Facade Worker. '; - - --- --- Name: contributor_repo_cntrb_repo_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.contributor_repo_cntrb_repo_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.contributor_repo_cntrb_repo_id_seq OWNER TO augur; - --- --- Name: contributor_repo; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.contributor_repo ( - cntrb_repo_id bigint DEFAULT nextval('augur_data.contributor_repo_cntrb_repo_id_seq'::regclass) NOT NULL, - repo_git character varying NOT NULL, - repo_name character varying NOT NULL, - gh_repo_id bigint NOT NULL, - cntrb_category character varying, - event_id bigint, - created_at timestamp(0) without time zone, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid NOT NULL -); - - -ALTER TABLE augur_data.contributor_repo OWNER TO augur; - --- --- Name: TABLE contributor_repo; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.contributor_repo IS 'Developed in Partnership with Andrew Brain. -From: [ - { - "login": "octocat", - "id": 1, - "node_id": "MDQ6VXNlcjE=", - "avatar_url": "https://github.com/images/error/octocat_happy.gif", - "gravatar_id": "", - "url": "https://api.github.com/users/octocat", - "html_url": "https://github.com/octocat", - "followers_url": "https://api.github.com/users/octocat/followers", - "following_url": "https://api.github.com/users/octocat/following{/other_user}", - "gists_url": "https://api.github.com/users/octocat/gists{/gist_id}", - "starred_url": "https://api.github.com/users/octocat/starred{/owner}{/repo}", - "subscriptions_url": "https://api.github.com/users/octocat/subscriptions", - "organizations_url": "https://api.github.com/users/octocat/orgs", - "repos_url": "https://api.github.com/users/octocat/repos", - "events_url": "https://api.github.com/users/octocat/events{/privacy}", - "received_events_url": "https://api.github.com/users/octocat/received_events", - "type": "User", - "site_admin": false - } -] -'; - - --- --- Name: COLUMN contributor_repo.repo_git; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributor_repo.repo_git IS 'Similar to cntrb_id, we need this data for the table to have meaningful data. '; - - --- --- Name: COLUMN contributor_repo.cntrb_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributor_repo.cntrb_id IS 'This is not null because what is the point without the contributor in this table? '; - - --- --- Name: contributors; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.contributors ( - cntrb_login character varying, - cntrb_email character varying, - cntrb_full_name character varying, - cntrb_company character varying, - cntrb_created_at timestamp(0) without time zone, - cntrb_type character varying, - cntrb_fake smallint DEFAULT 0, - cntrb_deleted smallint DEFAULT 0, - cntrb_long numeric(11,8) DEFAULT NULL::numeric, - cntrb_lat numeric(10,8) DEFAULT NULL::numeric, - cntrb_country_code character(3) DEFAULT NULL::bpchar, - cntrb_state character varying, - cntrb_city character varying, - cntrb_location character varying, - cntrb_canonical character varying, - cntrb_last_used timestamp(0) with time zone DEFAULT NULL::timestamp with time zone, - gh_user_id bigint, - gh_login character varying, - gh_url character varying, - gh_html_url character varying, - gh_node_id character varying, - gh_avatar_url character varying, - gh_gravatar_id character varying, - gh_followers_url character varying, - gh_following_url character varying, - gh_gists_url character varying, - gh_starred_url character varying, - gh_subscriptions_url character varying, - gh_organizations_url character varying, - gh_repos_url character varying, - gh_events_url character varying, - gh_received_events_url character varying, - gh_type character varying, - gh_site_admin character varying, - gl_web_url character varying, - gl_avatar_url character varying, - gl_state character varying, - gl_username character varying, - gl_full_name character varying, - gl_id bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid NOT NULL -); - - -ALTER TABLE augur_data.contributors OWNER TO augur; - --- --- Name: TABLE contributors; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.contributors IS 'For GitHub, this should be repeated from gh_login. for other systems, it should be that systems login. -Github now allows a user to change their login name, but their user id remains the same in this case. So, the natural key is the combination of id and login, but there should never be repeated logins. '; - - --- --- Name: COLUMN contributors.cntrb_login; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.cntrb_login IS 'Will be a double population with the same value as gh_login for github, but the local value for other systems. '; - - --- --- Name: COLUMN contributors.cntrb_email; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.cntrb_email IS 'This needs to be here for matching contributor ids, which are augur, to the commit information. '; - - --- --- Name: COLUMN contributors.cntrb_type; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.cntrb_type IS 'Present in another models. It is not currently used in Augur. '; - - --- --- Name: COLUMN contributors.gh_login; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.gh_login IS 'populated with the github user name for github originated data. '; - - --- --- Name: COLUMN contributors.gl_web_url; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.gl_web_url IS '“web_url” value from these API calls to GitLab, all for the same user - -https://gitlab.com/api/v4/users?username=computationalmystic -https://gitlab.com/api/v4/users?search=s@goggins.com -https://gitlab.com/api/v4/users?search=outdoors@acm.org - -[ - { - "id": 5481034, - "name": "sean goggins", - "username": "computationalmystic", - "state": "active", - "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon", - "web_url": "https://gitlab.com/computationalmystic" - } -]'; - - --- --- Name: COLUMN contributors.gl_avatar_url; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.gl_avatar_url IS '“avatar_url” value from these API calls to GitLab, all for the same user - -https://gitlab.com/api/v4/users?username=computationalmystic -https://gitlab.com/api/v4/users?search=s@goggins.com -https://gitlab.com/api/v4/users?search=outdoors@acm.org - -[ - { - "id": 5481034, - "name": "sean goggins", - "username": "computationalmystic", - "state": "active", - "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon", - "web_url": "https://gitlab.com/computationalmystic" - } -]'; - - --- --- Name: COLUMN contributors.gl_state; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.gl_state IS '“state” value from these API calls to GitLab, all for the same user - -https://gitlab.com/api/v4/users?username=computationalmystic -https://gitlab.com/api/v4/users?search=s@goggins.com -https://gitlab.com/api/v4/users?search=outdoors@acm.org - -[ - { - "id": 5481034, - "name": "sean goggins", - "username": "computationalmystic", - "state": "active", - "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon", - "web_url": "https://gitlab.com/computationalmystic" - } -]'; - - --- --- Name: COLUMN contributors.gl_username; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.gl_username IS '“username” value from these API calls to GitLab, all for the same user - -https://gitlab.com/api/v4/users?username=computationalmystic -https://gitlab.com/api/v4/users?search=s@goggins.com -https://gitlab.com/api/v4/users?search=outdoors@acm.org - -[ - { - "id": 5481034, - "name": "sean goggins", - "username": "computationalmystic", - "state": "active", - "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon", - "web_url": "https://gitlab.com/computationalmystic" - } -]'; - - --- --- Name: COLUMN contributors.gl_full_name; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.gl_full_name IS '“name” value from these API calls to GitLab, all for the same user - -https://gitlab.com/api/v4/users?username=computationalmystic -https://gitlab.com/api/v4/users?search=s@goggins.com -https://gitlab.com/api/v4/users?search=outdoors@acm.org - -[ - { - "id": 5481034, - "name": "sean goggins", - "username": "computationalmystic", - "state": "active", - "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon", - "web_url": "https://gitlab.com/computationalmystic" - } -]'; - - --- --- Name: COLUMN contributors.gl_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.contributors.gl_id IS '"id" value from these API calls to GitLab, all for the same user - -https://gitlab.com/api/v4/users?username=computationalmystic -https://gitlab.com/api/v4/users?search=s@goggins.com -https://gitlab.com/api/v4/users?search=outdoors@acm.org - -[ - { - "id": 5481034, - "name": "sean goggins", - "username": "computationalmystic", - "state": "active", - "avatar_url": "https://secure.gravatar.com/avatar/fb1fb43953a6059df2fe8d94b21d575c?s=80&d=identicon", - "web_url": "https://gitlab.com/computationalmystic" - } -]'; - - --- --- Name: contributors_aliases_cntrb_alias_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.contributors_aliases_cntrb_alias_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.contributors_aliases_cntrb_alias_id_seq OWNER TO augur; - --- --- Name: contributors_aliases; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.contributors_aliases ( - cntrb_alias_id bigint DEFAULT nextval('augur_data.contributors_aliases_cntrb_alias_id_seq'::regclass) NOT NULL, - canonical_email character varying NOT NULL, - alias_email character varying NOT NULL, - cntrb_active smallint DEFAULT 1 NOT NULL, - cntrb_last_modified timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid NOT NULL -); - - -ALTER TABLE augur_data.contributors_aliases OWNER TO augur; - --- --- Name: TABLE contributors_aliases; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.contributors_aliases IS 'Every open source user may have more than one email used to make contributions over time. Augur selects the first email it encounters for a user as its “canonical_email”. - -The canonical_email is also added to the contributors_aliases table, with the canonical_email and alias_email being identical. Using this strategy, an email search will only need to join the alias table for basic email information, and can then more easily map the canonical email from each alias row to the same, more detailed information in the contributors table for a user. '; - - --- --- Name: contributors_aliases_cntrb_a_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.contributors_aliases_cntrb_a_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.contributors_aliases_cntrb_a_id_seq OWNER TO augur; - --- --- Name: contributors_cntrb_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.contributors_cntrb_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.contributors_cntrb_id_seq OWNER TO augur; - --- --- Name: contributors_history_cntrb_history_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.contributors_history_cntrb_history_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.contributors_history_cntrb_history_id_seq OWNER TO augur; - --- --- Name: discourse_insights_msg_discourse_id_seq1; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.discourse_insights_msg_discourse_id_seq1 - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.discourse_insights_msg_discourse_id_seq1 OWNER TO augur; - --- --- Name: discourse_insights; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.discourse_insights ( - msg_discourse_id bigint DEFAULT nextval('augur_data.discourse_insights_msg_discourse_id_seq1'::regclass) NOT NULL, - msg_id bigint, - discourse_act character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.discourse_insights OWNER TO augur; - --- --- Name: TABLE discourse_insights; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.discourse_insights IS 'This table is populated by the “Discourse_Analysis_Worker”. It examines sequential discourse, using computational linguistic methods, to draw statistical inferences regarding the discourse in a particular comment thread. '; - - --- --- Name: discourse_insights_msg_discourse_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.discourse_insights_msg_discourse_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.discourse_insights_msg_discourse_id_seq OWNER TO augur; - --- --- Name: dm_repo_annual; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.dm_repo_annual ( - repo_id bigint NOT NULL, - email character varying NOT NULL, - affiliation character varying DEFAULT 'NULL'::character varying, - year smallint NOT NULL, - added bigint NOT NULL, - removed bigint NOT NULL, - whitespace bigint NOT NULL, - files bigint NOT NULL, - patches bigint NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.dm_repo_annual OWNER TO augur; - --- --- Name: dm_repo_group_annual; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.dm_repo_group_annual ( - repo_group_id bigint NOT NULL, - email character varying NOT NULL, - affiliation character varying DEFAULT 'NULL'::character varying, - year smallint NOT NULL, - added bigint NOT NULL, - removed bigint NOT NULL, - whitespace bigint NOT NULL, - files bigint NOT NULL, - patches bigint NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.dm_repo_group_annual OWNER TO augur; - --- --- Name: dm_repo_group_monthly; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.dm_repo_group_monthly ( - repo_group_id bigint NOT NULL, - email character varying NOT NULL, - affiliation character varying DEFAULT 'NULL'::character varying, - month smallint NOT NULL, - year smallint NOT NULL, - added bigint NOT NULL, - removed bigint NOT NULL, - whitespace bigint NOT NULL, - files bigint NOT NULL, - patches bigint NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.dm_repo_group_monthly OWNER TO augur; - --- --- Name: dm_repo_group_weekly; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.dm_repo_group_weekly ( - repo_group_id bigint NOT NULL, - email character varying NOT NULL, - affiliation character varying DEFAULT 'NULL'::character varying, - week smallint NOT NULL, - year smallint NOT NULL, - added bigint NOT NULL, - removed bigint NOT NULL, - whitespace bigint NOT NULL, - files bigint NOT NULL, - patches bigint NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.dm_repo_group_weekly OWNER TO augur; - --- --- Name: dm_repo_monthly; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.dm_repo_monthly ( - repo_id bigint NOT NULL, - email character varying NOT NULL, - affiliation character varying DEFAULT 'NULL'::character varying, - month smallint NOT NULL, - year smallint NOT NULL, - added bigint NOT NULL, - removed bigint NOT NULL, - whitespace bigint NOT NULL, - files bigint NOT NULL, - patches bigint NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.dm_repo_monthly OWNER TO augur; - --- --- Name: dm_repo_weekly; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.dm_repo_weekly ( - repo_id bigint NOT NULL, - email character varying NOT NULL, - affiliation character varying DEFAULT 'NULL'::character varying, - week smallint NOT NULL, - year smallint NOT NULL, - added bigint NOT NULL, - removed bigint NOT NULL, - whitespace bigint NOT NULL, - files bigint NOT NULL, - patches bigint NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.dm_repo_weekly OWNER TO augur; - --- --- Name: exclude; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.exclude ( - id integer NOT NULL, - projects_id integer NOT NULL, - email character varying DEFAULT 'NULL'::character varying, - domain character varying DEFAULT 'NULL'::character varying -); - - -ALTER TABLE augur_data.exclude OWNER TO augur; - --- --- Name: issue_assignees_issue_assignee_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.issue_assignees_issue_assignee_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.issue_assignees_issue_assignee_id_seq OWNER TO augur; - --- --- Name: issue_assignees; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.issue_assignees ( - issue_assignee_id bigint DEFAULT nextval('augur_data.issue_assignees_issue_assignee_id_seq'::regclass) NOT NULL, - issue_id bigint, - repo_id bigint, - issue_assignee_src_id bigint, - issue_assignee_src_node character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid -); - - -ALTER TABLE augur_data.issue_assignees OWNER TO augur; - --- --- Name: COLUMN issue_assignees.issue_assignee_src_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issue_assignees.issue_assignee_src_id IS 'This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.'; - - --- --- Name: COLUMN issue_assignees.issue_assignee_src_node; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issue_assignees.issue_assignee_src_node IS 'This character based identifier comes from the source. In the case of GitHub, it is the id that is the second field returned from the issue events API in the issue_assignees embedded JSON object. We may discover it is an ID for the person themselves; but my hypothesis is that its not.'; - - --- --- Name: issue_events_event_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.issue_events_event_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.issue_events_event_id_seq OWNER TO augur; - --- --- Name: issue_events; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.issue_events ( - event_id bigint DEFAULT nextval('augur_data.issue_events_event_id_seq'::regclass) NOT NULL, - issue_id bigint NOT NULL, - repo_id bigint, - action character varying NOT NULL, - action_commit_hash character varying, - created_at timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - node_id character varying, - node_url character varying, - platform_id bigint NOT NULL, - issue_event_src_id bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid -); - - -ALTER TABLE augur_data.issue_events OWNER TO augur; - --- --- Name: COLUMN issue_events.node_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issue_events.node_id IS 'This should be renamed to issue_event_src_node_id, as its the varchar identifier in GitHub and likely common in other sources as well. However, since it was created before we came to this naming standard and workers are built around it, we have it simply named as node_id. Anywhere you see node_id in the schema, it comes from GitHubs terminology.'; - - --- --- Name: COLUMN issue_events.issue_event_src_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issue_events.issue_event_src_id IS 'This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API'; - - --- --- Name: issue_labels_issue_label_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.issue_labels_issue_label_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.issue_labels_issue_label_id_seq OWNER TO augur; - --- --- Name: issue_labels; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.issue_labels ( - issue_label_id bigint DEFAULT nextval('augur_data.issue_labels_issue_label_id_seq'::regclass) NOT NULL, - issue_id bigint, - repo_id bigint, - label_text character varying, - label_description character varying, - label_color character varying, - label_src_id bigint, - label_src_node_id character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.issue_labels OWNER TO augur; - --- --- Name: COLUMN issue_labels.label_src_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issue_labels.label_src_id IS 'This character based identifier (node) comes from the source. In the case of GitHub, it is the id that is the second field returned from the issue events API JSON subsection for issues.'; - - --- --- Name: issue_message_ref_issue_msg_ref_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.issue_message_ref_issue_msg_ref_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.issue_message_ref_issue_msg_ref_id_seq OWNER TO augur; - --- --- Name: issue_message_ref; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.issue_message_ref ( - issue_msg_ref_id bigint DEFAULT nextval('augur_data.issue_message_ref_issue_msg_ref_id_seq'::regclass) NOT NULL, - issue_id bigint, - repo_id bigint, - msg_id bigint, - issue_msg_ref_src_node_id character varying, - issue_msg_ref_src_comment_id bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.issue_message_ref OWNER TO augur; - --- --- Name: COLUMN issue_message_ref.issue_msg_ref_src_node_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issue_message_ref.issue_msg_ref_src_node_id IS 'This character based identifier comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue comments API'; - - --- --- Name: COLUMN issue_message_ref.issue_msg_ref_src_comment_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issue_message_ref.issue_msg_ref_src_comment_id IS 'This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue comments API'; - - --- --- Name: issue_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.issue_seq - START WITH 31000 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.issue_seq OWNER TO augur; - --- --- Name: issues; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.issues ( - issue_id bigint DEFAULT nextval('augur_data.issue_seq'::regclass) NOT NULL, - repo_id bigint, - pull_request bigint, - pull_request_id bigint, - created_at timestamp(0) without time zone, - issue_title character varying, - issue_body character varying, - comment_count bigint, - updated_at timestamp(0) without time zone, - closed_at timestamp(0) without time zone, - due_on timestamp(0) without time zone, - repository_url character varying, - issue_url character varying, - labels_url character varying, - comments_url character varying, - events_url character varying, - html_url character varying, - issue_state character varying, - issue_node_id character varying, - gh_issue_number bigint, - gh_issue_id bigint, - gh_user_id bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - reporter_id uuid, - cntrb_id uuid -); - - -ALTER TABLE augur_data.issues OWNER TO augur; - --- --- Name: COLUMN issues.reporter_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issues.reporter_id IS 'The ID of the person who opened the issue. '; - - --- --- Name: COLUMN issues.cntrb_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.issues.cntrb_id IS 'The ID of the person who closed the issue. '; - - --- --- Name: libraries_library_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.libraries_library_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.libraries_library_id_seq OWNER TO augur; - --- --- Name: libraries; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.libraries ( - library_id bigint DEFAULT nextval('augur_data.libraries_library_id_seq'::regclass) NOT NULL, - repo_id bigint, - platform character varying, - name character varying, - created_timestamp timestamp(0) without time zone DEFAULT NULL::timestamp without time zone, - updated_timestamp timestamp(0) without time zone DEFAULT NULL::timestamp without time zone, - library_description character varying(2000) DEFAULT NULL::character varying, - keywords character varying, - library_homepage character varying(1000) DEFAULT NULL::character varying, - license character varying, - version_count integer, - latest_release_timestamp character varying, - latest_release_number character varying, - package_manager_id character varying, - dependency_count integer, - dependent_library_count integer, - primary_language character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.libraries OWNER TO augur; - --- --- Name: library_dependencies_lib_dependency_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.library_dependencies_lib_dependency_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.library_dependencies_lib_dependency_id_seq OWNER TO augur; - --- --- Name: library_dependencies; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.library_dependencies ( - lib_dependency_id bigint DEFAULT nextval('augur_data.library_dependencies_lib_dependency_id_seq'::regclass) NOT NULL, - library_id bigint, - manifest_platform character varying, - manifest_filepath character varying(1000) DEFAULT NULL::character varying, - manifest_kind character varying, - repo_id_branch character varying NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.library_dependencies OWNER TO augur; - --- --- Name: library_version_library_version_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.library_version_library_version_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.library_version_library_version_id_seq OWNER TO augur; - --- --- Name: library_version; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.library_version ( - library_version_id bigint DEFAULT nextval('augur_data.library_version_library_version_id_seq'::regclass) NOT NULL, - library_id bigint, - library_platform character varying, - version_number character varying, - version_release_date timestamp(0) without time zone DEFAULT NULL::timestamp without time zone, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.library_version OWNER TO augur; - --- --- Name: lstm_anomaly_models_model_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.lstm_anomaly_models_model_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.lstm_anomaly_models_model_id_seq OWNER TO augur; - --- --- Name: lstm_anomaly_models; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.lstm_anomaly_models ( - model_id bigint DEFAULT nextval('augur_data.lstm_anomaly_models_model_id_seq'::regclass) NOT NULL, - model_name character varying, - model_description character varying, - look_back_days bigint, - training_days bigint, - batch_size bigint, - metric character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.lstm_anomaly_models OWNER TO augur; - --- --- Name: lstm_anomaly_results_result_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.lstm_anomaly_results_result_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.lstm_anomaly_results_result_id_seq OWNER TO augur; - --- --- Name: lstm_anomaly_results; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.lstm_anomaly_results ( - result_id bigint DEFAULT nextval('augur_data.lstm_anomaly_results_result_id_seq'::regclass) NOT NULL, - repo_id bigint, - repo_category character varying, - model_id bigint, - metric character varying, - contamination_factor double precision, - mean_absolute_error double precision, - remarks character varying, - metric_field character varying, - mean_absolute_actual_value double precision, - mean_absolute_prediction_value double precision, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.lstm_anomaly_results OWNER TO augur; - --- --- Name: COLUMN lstm_anomaly_results.metric_field; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.lstm_anomaly_results.metric_field IS 'This is a listing of all of the endpoint fields included in the generation of the metric. Sometimes there is one, sometimes there is more than one. This will list them all. '; - - --- --- Name: message_msg_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.message_msg_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.message_msg_id_seq OWNER TO augur; - --- --- Name: message; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.message ( - msg_id bigint DEFAULT nextval('augur_data.message_msg_id_seq'::regclass) NOT NULL, - rgls_id bigint, - platform_msg_id bigint, - platform_node_id character varying, - repo_id bigint, - msg_text character varying, - msg_timestamp timestamp(0) without time zone, - msg_sender_email character varying, - msg_header character varying, - pltfrm_id bigint NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid -); - - -ALTER TABLE augur_data.message OWNER TO augur; - --- --- Name: COLUMN message.cntrb_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message.cntrb_id IS 'Not populated for mailing lists. Populated for GitHub issues. '; - - --- --- Name: message_analysis_msg_analysis_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.message_analysis_msg_analysis_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.message_analysis_msg_analysis_id_seq OWNER TO augur; - --- --- Name: message_analysis; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.message_analysis ( - msg_analysis_id bigint DEFAULT nextval('augur_data.message_analysis_msg_analysis_id_seq'::regclass) NOT NULL, - msg_id bigint, - worker_run_id bigint, - sentiment_score double precision, - reconstruction_error double precision, - novelty_flag boolean, - feedback_flag boolean, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.message_analysis OWNER TO augur; - --- --- Name: COLUMN message_analysis.worker_run_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_analysis.worker_run_id IS 'This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis. '; - - --- --- Name: COLUMN message_analysis.sentiment_score; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_analysis.sentiment_score IS 'A sentiment analysis score. Zero is neutral, negative numbers are negative sentiment, and positive numbers are positive sentiment. '; - - --- --- Name: COLUMN message_analysis.reconstruction_error; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_analysis.reconstruction_error IS 'Each message is converted to a 250 dimensin doc2vec vector, so the reconstruction error is the difference between what the predicted vector and the actual vector.'; - - --- --- Name: COLUMN message_analysis.novelty_flag; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_analysis.novelty_flag IS 'This is an analysis of the degree to which the message is novel when compared to other messages in a repository. For example when bots are producing numerous identical messages, the novelty score is low. It would also be a low novelty score when several people are making the same coment. '; - - --- --- Name: COLUMN message_analysis.feedback_flag; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_analysis.feedback_flag IS 'This exists to provide the user with an opportunity provide feedback on the resulting the sentiment scores. '; - - --- --- Name: message_analysis_summary_msg_summary_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.message_analysis_summary_msg_summary_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.message_analysis_summary_msg_summary_id_seq OWNER TO augur; - --- --- Name: message_analysis_summary; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.message_analysis_summary ( - msg_summary_id bigint DEFAULT nextval('augur_data.message_analysis_summary_msg_summary_id_seq'::regclass) NOT NULL, - repo_id bigint, - worker_run_id bigint, - positive_ratio double precision, - negative_ratio double precision, - novel_count bigint, - period timestamp(0) without time zone, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.message_analysis_summary OWNER TO augur; - --- --- Name: TABLE message_analysis_summary; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.message_analysis_summary IS 'In a relationally perfect world, we would have a table called “message_analysis_run” the incremented the “worker_run_id” for both message_analysis and message_analysis_summary. For now, we decided this was overkill. '; - - --- --- Name: COLUMN message_analysis_summary.worker_run_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_analysis_summary.worker_run_id IS 'This value should reflect the worker_run_id for the messages summarized in the table. There is not a relation between these two tables for that purpose because its not *really*, relationaly a concept unless we create a third table for "worker_run_id", which we determined was unnecessarily complex. '; - - --- --- Name: COLUMN message_analysis_summary.novel_count; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_analysis_summary.novel_count IS 'The number of messages identified as novel during the analyzed period'; - - --- --- Name: COLUMN message_analysis_summary.period; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_analysis_summary.period IS 'The whole timeline is divided into periods based on the definition of time period for analysis, which is user specified. Timestamp of the first period to look at, until the end of messages at the data of execution. '; - - --- --- Name: message_sentiment_msg_analysis_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.message_sentiment_msg_analysis_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.message_sentiment_msg_analysis_id_seq OWNER TO augur; - --- --- Name: message_sentiment; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.message_sentiment ( - msg_analysis_id bigint DEFAULT nextval('augur_data.message_sentiment_msg_analysis_id_seq'::regclass) NOT NULL, - msg_id bigint, - worker_run_id bigint, - sentiment_score double precision, - reconstruction_error double precision, - novelty_flag boolean, - feedback_flag boolean, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.message_sentiment OWNER TO augur; - --- --- Name: COLUMN message_sentiment.worker_run_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_sentiment.worker_run_id IS 'This column is used to indicate analyses run by a worker during the same execution period, and is useful for grouping, and time series analysis. '; - - --- --- Name: COLUMN message_sentiment.sentiment_score; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_sentiment.sentiment_score IS 'A sentiment analysis score. Zero is neutral, negative numbers are negative sentiment, and positive numbers are positive sentiment. '; - - --- --- Name: COLUMN message_sentiment.reconstruction_error; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_sentiment.reconstruction_error IS 'Each message is converted to a 250 dimensin doc2vec vector, so the reconstruction error is the difference between what the predicted vector and the actual vector.'; - - --- --- Name: COLUMN message_sentiment.novelty_flag; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_sentiment.novelty_flag IS 'This is an analysis of the degree to which the message is novel when compared to other messages in a repository. For example when bots are producing numerous identical messages, the novelty score is low. It would also be a low novelty score when several people are making the same coment. '; - - --- --- Name: COLUMN message_sentiment.feedback_flag; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_sentiment.feedback_flag IS 'This exists to provide the user with an opportunity provide feedback on the resulting the sentiment scores. '; - - --- --- Name: message_sentiment_summary_msg_summary_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.message_sentiment_summary_msg_summary_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.message_sentiment_summary_msg_summary_id_seq OWNER TO augur; - --- --- Name: message_sentiment_summary; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.message_sentiment_summary ( - msg_summary_id bigint DEFAULT nextval('augur_data.message_sentiment_summary_msg_summary_id_seq'::regclass) NOT NULL, - repo_id bigint, - worker_run_id bigint, - positive_ratio double precision, - negative_ratio double precision, - novel_count bigint, - period timestamp(0) without time zone, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.message_sentiment_summary OWNER TO augur; - --- --- Name: TABLE message_sentiment_summary; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.message_sentiment_summary IS 'In a relationally perfect world, we would have a table called “message_sentiment_run” the incremented the “worker_run_id” for both message_sentiment and message_sentiment_summary. For now, we decided this was overkill. '; - - --- --- Name: COLUMN message_sentiment_summary.worker_run_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_sentiment_summary.worker_run_id IS 'This value should reflect the worker_run_id for the messages summarized in the table. There is not a relation between these two tables for that purpose because its not *really*, relationaly a concept unless we create a third table for "worker_run_id", which we determined was unnecessarily complex. '; - - --- --- Name: COLUMN message_sentiment_summary.novel_count; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_sentiment_summary.novel_count IS 'The number of messages identified as novel during the analyzed period'; - - --- --- Name: COLUMN message_sentiment_summary.period; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.message_sentiment_summary.period IS 'The whole timeline is divided into periods based on the definition of time period for analysis, which is user specified. Timestamp of the first period to look at, until the end of messages at the data of execution. '; - - --- --- Name: platform_pltfrm_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.platform_pltfrm_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.platform_pltfrm_id_seq OWNER TO augur; - --- --- Name: platform; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.platform ( - pltfrm_id bigint DEFAULT nextval('augur_data.platform_pltfrm_id_seq'::regclass) NOT NULL, - pltfrm_name character varying, - pltfrm_version character varying, - pltfrm_release_date date, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.platform OWNER TO augur; - --- --- Name: pull_request_analysis_pull_request_analysis_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_analysis_pull_request_analysis_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_analysis_pull_request_analysis_id_seq OWNER TO augur; - --- --- Name: pull_request_analysis; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_analysis ( - pull_request_analysis_id bigint DEFAULT nextval('augur_data.pull_request_analysis_pull_request_analysis_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - merge_probability numeric(256,250), - mechanism character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(6) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE augur_data.pull_request_analysis OWNER TO augur; - --- --- Name: COLUMN pull_request_analysis.pull_request_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_analysis.pull_request_id IS 'It would be better if the pull request worker is run first to fetch the latest PRs before analyzing'; - - --- --- Name: COLUMN pull_request_analysis.merge_probability; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_analysis.merge_probability IS 'Indicates the probability of the PR being merged'; - - --- --- Name: COLUMN pull_request_analysis.mechanism; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_analysis.mechanism IS 'the ML model used for prediction (It is XGBoost Classifier at present)'; - - --- --- Name: pull_request_assignees_pr_assignee_map_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_assignees_pr_assignee_map_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_assignees_pr_assignee_map_id_seq OWNER TO augur; - --- --- Name: pull_request_assignees; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_assignees ( - pr_assignee_map_id bigint DEFAULT nextval('augur_data.pull_request_assignees_pr_assignee_map_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - repo_id bigint, - pr_assignee_src_id bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - contrib_id uuid -); - - -ALTER TABLE augur_data.pull_request_assignees OWNER TO augur; - --- --- Name: pull_request_commits_pr_cmt_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_commits_pr_cmt_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_commits_pr_cmt_id_seq OWNER TO augur; - --- --- Name: pull_request_commits; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_commits ( - pr_cmt_id bigint DEFAULT nextval('augur_data.pull_request_commits_pr_cmt_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - repo_id bigint, - pr_cmt_sha character varying, - pr_cmt_node_id character varying, - pr_cmt_message character varying, - pr_cmt_comments_url character varying, - pr_cmt_timestamp timestamp(0) without time zone, - pr_cmt_author_email character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - pr_cmt_author_cntrb_id uuid -); - - -ALTER TABLE augur_data.pull_request_commits OWNER TO augur; - --- --- Name: TABLE pull_request_commits; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.pull_request_commits IS 'Pull request commits are an enumeration of each commit associated with a pull request. -Not all pull requests are from a branch or fork into master. -The commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project). -Therefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. -In cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. '; - - --- --- Name: COLUMN pull_request_commits.pr_cmt_sha; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_commits.pr_cmt_sha IS 'This is the commit SHA for a pull request commit. If the PR is not to the master branch of the main repository (or, in rare cases, from it), then you will NOT find a corresponding commit SHA in the commit table. (see table comment for further explanation). '; - - --- --- Name: pull_request_events_pr_event_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_events_pr_event_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_events_pr_event_id_seq OWNER TO augur; - --- --- Name: pull_request_events; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_events ( - pr_event_id bigint DEFAULT nextval('augur_data.pull_request_events_pr_event_id_seq'::regclass) NOT NULL, - pull_request_id bigint NOT NULL, - repo_id bigint, - action character varying NOT NULL, - action_commit_hash character varying, - created_at timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - issue_event_src_id bigint, - node_id character varying, - node_url character varying, - platform_id bigint DEFAULT 25150 NOT NULL, - pr_platform_event_id bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid -); - - -ALTER TABLE augur_data.pull_request_events OWNER TO augur; - --- --- Name: COLUMN pull_request_events.issue_event_src_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_events.issue_event_src_id IS 'This ID comes from the source. In the case of GitHub, it is the id that is the first field returned from the issue events API'; - - --- --- Name: COLUMN pull_request_events.node_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_events.node_id IS 'This should be renamed to issue_event_src_node_id, as its the varchar identifier in GitHub and likely common in other sources as well. However, since it was created before we came to this naming standard and workers are built around it, we have it simply named as node_id. Anywhere you see node_id in the schema, it comes from GitHubs terminology.'; - - --- --- Name: pull_request_files_pr_file_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_files_pr_file_id_seq - START WITH 25150 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_files_pr_file_id_seq OWNER TO augur; - --- --- Name: pull_request_files; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_files ( - pr_file_id bigint DEFAULT nextval('augur_data.pull_request_files_pr_file_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - repo_id bigint, - pr_file_additions bigint, - pr_file_deletions bigint, - pr_file_path character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.pull_request_files OWNER TO augur; - --- --- Name: TABLE pull_request_files; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.pull_request_files IS 'Pull request commits are an enumeration of each commit associated with a pull request. -Not all pull requests are from a branch or fork into master. -The commits table intends to count only commits that end up in the master branch (i.e., part of the deployed code base for a project). -Therefore, there will be commit “SHA”’s in this table that are no associated with a commit SHA in the commits table. -In cases where the PR is to the master branch of a project, you will find a match. In cases where the PR does not involve the master branch, you will not find a corresponding commit SHA in the commits table. This is expected. '; - - --- --- Name: pull_request_labels_pr_label_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_labels_pr_label_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_labels_pr_label_id_seq OWNER TO augur; - --- --- Name: pull_request_labels; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_labels ( - pr_label_id bigint DEFAULT nextval('augur_data.pull_request_labels_pr_label_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - repo_id bigint, - pr_src_id bigint, - pr_src_node_id character varying, - pr_src_url character varying, - pr_src_description character varying, - pr_src_color character varying, - pr_src_default_bool boolean, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.pull_request_labels OWNER TO augur; - --- --- Name: pull_request_message_ref_pr_msg_ref_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_message_ref_pr_msg_ref_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_message_ref_pr_msg_ref_id_seq OWNER TO augur; - --- --- Name: pull_request_message_ref; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_message_ref ( - pr_msg_ref_id bigint DEFAULT nextval('augur_data.pull_request_message_ref_pr_msg_ref_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - repo_id bigint, - msg_id bigint, - pr_message_ref_src_comment_id bigint, - pr_message_ref_src_node_id character varying, - pr_issue_url character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.pull_request_message_ref OWNER TO augur; - --- --- Name: pull_request_meta_pr_repo_meta_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_meta_pr_repo_meta_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_meta_pr_repo_meta_id_seq OWNER TO augur; - --- --- Name: pull_request_meta; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_meta ( - pr_repo_meta_id bigint DEFAULT nextval('augur_data.pull_request_meta_pr_repo_meta_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - repo_id bigint, - pr_head_or_base character varying, - pr_src_meta_label character varying, - pr_src_meta_ref character varying, - pr_sha character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid -); - - -ALTER TABLE augur_data.pull_request_meta OWNER TO augur; - --- --- Name: TABLE pull_request_meta; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.pull_request_meta IS 'Pull requests contain referencing metadata. There are a few columns that are discrete. There are also head and base designations for the repo on each side of the pull request. Similar functions exist in GitLab, though the language here is based on GitHub. The JSON Being adapted to as of the development of this schema is here: "base": { "label": "chaoss:dev", "ref": "dev", "sha": "dc6c6f3947f7dc84ecba3d8bda641ef786e7027d", "user": { "login": "chaoss", "id": 29740296, "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2", "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4", "gravatar_id": "", "url": "https://api.github.com/users/chaoss", "html_url": "https://github.com/chaoss", "followers_url": "https://api.github.com/users/chaoss/followers", "following_url": "https://api.github.com/users/chaoss/following{/other_user}", "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}", "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions", "organizations_url": "https://api.github.com/users/chaoss/orgs", "repos_url": "https://api.github.com/users/chaoss/repos", "events_url": "https://api.github.com/users/chaoss/events{/privacy}", "received_events_url": "https://api.github.com/users/chaoss/received_events", "type": "Organization", "site_admin": false }, "repo": { "id": 78134122, "node_id": "MDEwOlJlcG9zaXRvcnk3ODEzNDEyMg==", "name": "augur", "full_name": "chaoss/augur", "private": false, "owner": { "login": "chaoss", "id": 29740296, "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2", "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4", "gravatar_id": "", "url": "https://api.github.com/users/chaoss", "html_url": "https://github.com/chaoss", "followers_url": "https://api.github.com/users/chaoss/followers", "following_url": "https://api.github.com/users/chaoss/following{/other_user}", "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}", "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions", "organizations_url": "https://api.github.com/users/chaoss/orgs", "repos_url": "https://api.github.com/users/chaoss/repos", "events_url": "https://api.github.com/users/chaoss/events{/privacy}", "received_events_url": "https://api.github.com/users/chaoss/received_events", "type": "Organization", "site_admin": false }, '; - - --- --- Name: COLUMN pull_request_meta.pr_head_or_base; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_meta.pr_head_or_base IS 'Each pull request should have one and only one head record; and one and only one base record. '; - - --- --- Name: COLUMN pull_request_meta.pr_src_meta_label; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_meta.pr_src_meta_label IS 'This is a representation of the repo:branch information in the pull request. Head is issueing the pull request and base is taking the pull request. For example: (We do not store all of this) - - "head": { - "label": "chaoss:pull-request-worker", - "ref": "pull-request-worker", - "sha": "6b380c3d6d625616f79d702612ebab6d204614f2", - "user": { - "login": "chaoss", - "id": 29740296, - "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2", - "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4", - "gravatar_id": "", - "url": "https://api.github.com/users/chaoss", - "html_url": "https://github.com/chaoss", - "followers_url": "https://api.github.com/users/chaoss/followers", - "following_url": "https://api.github.com/users/chaoss/following{/other_user}", - "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}", - "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}", - "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions", - "organizations_url": "https://api.github.com/users/chaoss/orgs", - "repos_url": "https://api.github.com/users/chaoss/repos", - "events_url": "https://api.github.com/users/chaoss/events{/privacy}", - "received_events_url": "https://api.github.com/users/chaoss/received_events", - "type": "Organization", - "site_admin": false - }, - "repo": { - "id": 78134122, - "node_id": "MDEwOlJlcG9zaXRvcnk3ODEzNDEyMg==", - "name": "augur", - "full_name": "chaoss/augur", - "private": false, - "owner": { - "login": "chaoss", - "id": 29740296, - "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2", - "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4", - "gravatar_id": "", - "url": "https://api.github.com/users/chaoss", - "html_url": "https://github.com/chaoss", - "followers_url": "https://api.github.com/users/chaoss/followers", - "following_url": "https://api.github.com/users/chaoss/following{/other_user}", - "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}", - "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}", - "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions", - "organizations_url": "https://api.github.com/users/chaoss/orgs", - "repos_url": "https://api.github.com/users/chaoss/repos", - "events_url": "https://api.github.com/users/chaoss/events{/privacy}", - "received_events_url": "https://api.github.com/users/chaoss/received_events", - "type": "Organization", - "site_admin": false - }, - "html_url": "https://github.com/chaoss/augur", - "description": "Python library and web service for Open Source Software Health and Sustainability metrics & data collection.", - "fork": false, - "url": "https://api.github.com/repos/chaoss/augur", - "forks_url": "https://api.github.com/repos/chaoss/augur/forks", - "keys_url": "https://api.github.com/repos/chaoss/augur/keys{/key_id}", - "collaborators_url": "https://api.github.com/repos/chaoss/augur/collaborators{/collaborator}", - "teams_url": "https://api.github.com/repos/chaoss/augur/teams", - "hooks_url": "https://api.github.com/repos/chaoss/augur/hooks", - "issue_events_url": "https://api.github.com/repos/chaoss/augur/issues/events{/number}", - "events_url": "https://api.github.com/repos/chaoss/augur/events", - "assignees_url": "https://api.github.com/repos/chaoss/augur/assignees{/user}", - "branches_url": "https://api.github.com/repos/chaoss/augur/branches{/branch}", - "tags_url": "https://api.github.com/repos/chaoss/augur/tags", - "blobs_url": "https://api.github.com/repos/chaoss/augur/git/blobs{/sha}", - "git_tags_url": "https://api.github.com/repos/chaoss/augur/git/tags{/sha}", - "git_refs_url": "https://api.github.com/repos/chaoss/augur/git/refs{/sha}", - "trees_url": "https://api.github.com/repos/chaoss/augur/git/trees{/sha}", - "statuses_url": "https://api.github.com/repos/chaoss/augur/statuses/{sha}", - "languages_url": "https://api.github.com/repos/chaoss/augur/languages", - "stargazers_url": "https://api.github.com/repos/chaoss/augur/stargazers", - "contributors_url": "https://api.github.com/repos/chaoss/augur/contributors", - "subscribers_url": "https://api.github.com/repos/chaoss/augur/subscribers", - "subscription_url": "https://api.github.com/repos/chaoss/augur/subscription", - "commits_url": "https://api.github.com/repos/chaoss/augur/commits{/sha}", - "git_commits_url": "https://api.github.com/repos/chaoss/augur/git/commits{/sha}", - "comments_url": "https://api.github.com/repos/chaoss/augur/comments{/number}", - "issue_comment_url": "https://api.github.com/repos/chaoss/augur/issues/comments{/number}", - "contents_url": "https://api.github.com/repos/chaoss/augur/contents/{+path}", - "compare_url": "https://api.github.com/repos/chaoss/augur/compare/{base}...{head}", - "merges_url": "https://api.github.com/repos/chaoss/augur/merges", - "archive_url": "https://api.github.com/repos/chaoss/augur/{archive_format}{/ref}", - "downloads_url": "https://api.github.com/repos/chaoss/augur/downloads", - "issues_url": "https://api.github.com/repos/chaoss/augur/issues{/number}", - "pulls_url": "https://api.github.com/repos/chaoss/augur/pulls{/number}", - "milestones_url": "https://api.github.com/repos/chaoss/augur/milestones{/number}", - "notifications_url": "https://api.github.com/repos/chaoss/augur/notifications{?since,all,participating}", - "labels_url": "https://api.github.com/repos/chaoss/augur/labels{/name}", - "releases_url": "https://api.github.com/repos/chaoss/augur/releases{/id}", - "deployments_url": "https://api.github.com/repos/chaoss/augur/deployments", - "created_at": "2017-01-05T17:34:54Z", - "updated_at": "2019-11-15T00:56:12Z", - "pushed_at": "2019-12-02T06:27:26Z", - "git_url": "git://github.com/chaoss/augur.git", - "ssh_url": "git@github.com:chaoss/augur.git", - "clone_url": "https://github.com/chaoss/augur.git", - "svn_url": "https://github.com/chaoss/augur", - "homepage": "http://augur.osshealth.io/", - "size": 82004, - "stargazers_count": 153, - "watchers_count": 153, - "language": "Python", - "has_issues": true, - "has_projects": false, - "has_downloads": true, - "has_wiki": false, - "has_pages": true, - "forks_count": 205, - "mirror_url": null, - "archived": false, - "disabled": false, - "open_issues_count": 14, - "license": { - "key": "mit", - "name": "MIT License", - "spdx_id": "MIT", - "url": "https://api.github.com/licenses/mit", - "node_id": "MDc6TGljZW5zZTEz" - }, - "forks": 205, - "open_issues": 14, - "watchers": 153, - "default_branch": "master" - } - }, - "base": { - "label": "chaoss:dev", - "ref": "dev", - "sha": "bfd2d34b51659613dd842cf83c3873f7699c2a0e", - "user": { - "login": "chaoss", - "id": 29740296, - "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2", - "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4", - "gravatar_id": "", - "url": "https://api.github.com/users/chaoss", - "html_url": "https://github.com/chaoss", - "followers_url": "https://api.github.com/users/chaoss/followers", - "following_url": "https://api.github.com/users/chaoss/following{/other_user}", - "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}", - "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}", - "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions", - "organizations_url": "https://api.github.com/users/chaoss/orgs", - "repos_url": "https://api.github.com/users/chaoss/repos", - "events_url": "https://api.github.com/users/chaoss/events{/privacy}", - "received_events_url": "https://api.github.com/users/chaoss/received_events", - "type": "Organization", - "site_admin": false - }, - "repo": { - "id": 78134122, - "node_id": "MDEwOlJlcG9zaXRvcnk3ODEzNDEyMg==", - "name": "augur", - "full_name": "chaoss/augur", - "private": false, - "owner": { - "login": "chaoss", - "id": 29740296, - "node_id": "MDEyOk9yZ2FuaXphdGlvbjI5NzQwMjk2", - "avatar_url": "https://avatars2.githubusercontent.com/u/29740296?v=4", - "gravatar_id": "", - "url": "https://api.github.com/users/chaoss", - "html_url": "https://github.com/chaoss", - "followers_url": "https://api.github.com/users/chaoss/followers", - "following_url": "https://api.github.com/users/chaoss/following{/other_user}", - "gists_url": "https://api.github.com/users/chaoss/gists{/gist_id}", - "starred_url": "https://api.github.com/users/chaoss/starred{/owner}{/repo}", - "subscriptions_url": "https://api.github.com/users/chaoss/subscriptions", - "organizations_url": "https://api.github.com/users/chaoss/orgs", - "repos_url": "https://api.github.com/users/chaoss/repos", - "events_url": "https://api.github.com/users/chaoss/events{/privacy}", - "received_events_url": "https://api.github.com/users/chaoss/received_events", - "type": "Organization", - "site_admin": false - }, -'; - - --- --- Name: pull_request_repo_pr_repo_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_repo_pr_repo_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_repo_pr_repo_id_seq OWNER TO augur; - --- --- Name: pull_request_repo; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_repo ( - pr_repo_id bigint DEFAULT nextval('augur_data.pull_request_repo_pr_repo_id_seq'::regclass) NOT NULL, - pr_repo_meta_id bigint, - pr_repo_head_or_base character varying, - pr_src_repo_id bigint, - pr_src_node_id character varying, - pr_repo_name character varying, - pr_repo_full_name character varying, - pr_repo_private_bool boolean, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - pr_cntrb_id uuid -); - - -ALTER TABLE augur_data.pull_request_repo OWNER TO augur; - --- --- Name: TABLE pull_request_repo; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.pull_request_repo IS 'This table is for storing information about forks that exist as part of a pull request. Generally we do not want to track these like ordinary repositories. '; - - --- --- Name: COLUMN pull_request_repo.pr_repo_head_or_base; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_repo.pr_repo_head_or_base IS 'For ease of validation checking, we should determine if the repository referenced is the head or base of the pull request. Each pull request should have one and only one of these, which is not enforcable easily in the database.'; - - --- --- Name: pull_request_review_message_ref_pr_review_msg_ref_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq OWNER TO augur; - --- --- Name: pull_request_review_message_ref; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_review_message_ref ( - pr_review_msg_ref_id bigint DEFAULT nextval('augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq'::regclass) NOT NULL, - pr_review_id bigint NOT NULL, - repo_id bigint, - msg_id bigint NOT NULL, - pr_review_msg_url character varying, - pr_review_src_id bigint, - pr_review_msg_src_id bigint, - pr_review_msg_node_id character varying, - pr_review_msg_diff_hunk character varying, - pr_review_msg_path character varying, - pr_review_msg_position bigint, - pr_review_msg_original_position bigint, - pr_review_msg_commit_id character varying, - pr_review_msg_original_commit_id character varying, - pr_review_msg_updated_at timestamp(6) without time zone, - pr_review_msg_html_url character varying, - pr_url character varying, - pr_review_msg_author_association character varying, - pr_review_msg_start_line bigint, - pr_review_msg_original_start_line bigint, - pr_review_msg_start_side character varying, - pr_review_msg_line bigint, - pr_review_msg_original_line bigint, - pr_review_msg_side character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.pull_request_review_message_ref OWNER TO augur; - --- --- Name: pull_request_reviewers_pr_reviewer_map_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_reviewers_pr_reviewer_map_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_reviewers_pr_reviewer_map_id_seq OWNER TO augur; - --- --- Name: pull_request_reviewers; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_reviewers ( - pr_reviewer_map_id bigint DEFAULT nextval('augur_data.pull_request_reviewers_pr_reviewer_map_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - pr_source_id bigint, - repo_id bigint, - pr_reviewer_src_id bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid -); - - -ALTER TABLE augur_data.pull_request_reviewers OWNER TO augur; - --- --- Name: COLUMN pull_request_reviewers.pr_source_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_reviewers.pr_source_id IS 'The platform ID for the pull/merge request. Used as part of the natural key, along with pr_reviewer_src_id in this table. '; - - --- --- Name: COLUMN pull_request_reviewers.pr_reviewer_src_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_request_reviewers.pr_reviewer_src_id IS 'The platform ID for the pull/merge request reviewer. Used as part of the natural key, along with pr_source_id in this table. '; - - --- --- Name: pull_request_reviews_pr_review_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_reviews_pr_review_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_reviews_pr_review_id_seq OWNER TO augur; - --- --- Name: pull_request_reviews; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_reviews ( - pr_review_id bigint DEFAULT nextval('augur_data.pull_request_reviews_pr_review_id_seq'::regclass) NOT NULL, - pull_request_id bigint NOT NULL, - repo_id bigint, - pr_review_author_association character varying, - pr_review_state character varying, - pr_review_body character varying, - pr_review_submitted_at timestamp(6) without time zone, - pr_review_src_id bigint, - pr_review_node_id character varying, - pr_review_html_url character varying, - pr_review_pull_request_url character varying, - pr_review_commit_id character varying, - platform_id bigint DEFAULT 25150, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - cntrb_id uuid NOT NULL -); - - -ALTER TABLE augur_data.pull_request_reviews OWNER TO augur; - --- --- Name: pull_request_teams_pr_team_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_request_teams_pr_team_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_request_teams_pr_team_id_seq OWNER TO augur; - --- --- Name: pull_request_teams; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_request_teams ( - pr_team_id bigint DEFAULT nextval('augur_data.pull_request_teams_pr_team_id_seq'::regclass) NOT NULL, - pull_request_id bigint, - pr_src_team_id bigint, - pr_src_team_node character varying, - pr_src_team_url character varying, - pr_team_name character varying, - pr_team_slug character varying, - pr_team_description character varying, - pr_team_privacy character varying, - pr_team_permission character varying, - pr_team_src_members_url character varying, - pr_team_src_repositories_url character varying, - pr_team_parent_id bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.pull_request_teams OWNER TO augur; - --- --- Name: pull_requests_pull_request_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.pull_requests_pull_request_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.pull_requests_pull_request_id_seq OWNER TO augur; - --- --- Name: pull_requests; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.pull_requests ( - pull_request_id bigint DEFAULT nextval('augur_data.pull_requests_pull_request_id_seq'::regclass) NOT NULL, - repo_id bigint DEFAULT 0, - pr_url character varying, - pr_src_id bigint, - pr_src_node_id character varying, - pr_html_url character varying, - pr_diff_url character varying, - pr_patch_url character varying, - pr_issue_url character varying, - pr_augur_issue_id bigint, - pr_src_number bigint, - pr_src_state character varying, - pr_src_locked boolean, - pr_src_title character varying, - pr_body text, - pr_created_at timestamp(0) without time zone, - pr_updated_at timestamp(0) without time zone, - pr_closed_at timestamp(0) without time zone, - pr_merged_at timestamp(0) without time zone, - pr_merge_commit_sha character varying, - pr_teams bigint, - pr_milestone character varying, - pr_commits_url character varying, - pr_review_comments_url character varying, - pr_review_comment_url character varying, - pr_comments_url character varying, - pr_statuses_url character varying, - pr_meta_head_id character varying, - pr_meta_base_id character varying, - pr_src_issue_url character varying, - pr_src_comments_url character varying, - pr_src_review_comments_url character varying, - pr_src_commits_url character varying, - pr_src_statuses_url character varying, - pr_src_author_association character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - pr_augur_contributor_id uuid -); - - -ALTER TABLE augur_data.pull_requests OWNER TO augur; - --- --- Name: COLUMN pull_requests.pr_src_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_requests.pr_src_id IS 'The pr_src_id is unique across all of github.'; - - --- --- Name: COLUMN pull_requests.pr_augur_issue_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_requests.pr_augur_issue_id IS 'This is to link to the augur stored related issue'; - - --- --- Name: COLUMN pull_requests.pr_src_number; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_requests.pr_src_number IS 'The pr_src_number is unique within a repository.'; - - --- --- Name: COLUMN pull_requests.pr_teams; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_requests.pr_teams IS 'One to many with pull request teams. '; - - --- --- Name: COLUMN pull_requests.pr_review_comment_url; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_requests.pr_review_comment_url IS 'This is a field with limited utility. It does expose how to access a specific comment if needed with parameters. If the source changes URL structure, it may be useful'; - - --- --- Name: COLUMN pull_requests.pr_meta_head_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_requests.pr_meta_head_id IS 'The metadata for the head repo that links to the pull_request_meta table. '; - - --- --- Name: COLUMN pull_requests.pr_meta_base_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_requests.pr_meta_base_id IS 'The metadata for the base repo that links to the pull_request_meta table. '; - - --- --- Name: COLUMN pull_requests.pr_augur_contributor_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.pull_requests.pr_augur_contributor_id IS 'This is to link to the augur contributor record. '; - - --- --- Name: releases_release_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.releases_release_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.releases_release_id_seq OWNER TO augur; - --- --- Name: releases; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.releases ( - release_id character(64) DEFAULT nextval('augur_data.releases_release_id_seq'::regclass) NOT NULL, - repo_id bigint NOT NULL, - release_name character varying, - release_description character varying, - release_author character varying, - release_created_at timestamp(6) without time zone, - release_published_at timestamp(6) without time zone, - release_updated_at timestamp(6) without time zone, - release_is_draft boolean, - release_is_prerelease boolean, - release_tag_name character varying, - release_url character varying, - tag_only boolean, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.releases OWNER TO augur; - --- --- Name: repo_repo_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_repo_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_repo_id_seq OWNER TO augur; - --- --- Name: repo; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo ( - repo_id bigint DEFAULT nextval('augur_data.repo_repo_id_seq'::regclass) NOT NULL, - repo_group_id bigint NOT NULL, - repo_git character varying NOT NULL, - repo_path character varying DEFAULT 'NULL'::character varying, - repo_name character varying DEFAULT 'NULL'::character varying, - repo_added timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - repo_status character varying DEFAULT 'New'::character varying NOT NULL, - repo_type character varying DEFAULT ''::character varying, - url character varying, - owner_id integer, - description character varying, - primary_language character varying, - created_at character varying, - forked_from character varying, - updated_at timestamp(0) without time zone, - repo_archived_date_collected timestamp(0) with time zone, - repo_archived integer, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo OWNER TO augur; - --- --- Name: TABLE repo; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo IS 'This table is a combination of the columns in Facade’s repo table and GHTorrent’s projects table. '; - - --- --- Name: COLUMN repo.repo_type; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo.repo_type IS 'This field is intended to indicate if the repository is the "main instance" of a repository in cases where implementations choose to add the same repository to more than one repository group. In cases where the repository group is of rg_type Github Organization then this repo_type should be "primary". In other cases the repo_type should probably be "user created". We made this a varchar in order to hold open the possibility that there are additional repo_types we have not thought about. '; - - --- --- Name: repo_badging_badge_collection_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_badging_badge_collection_id_seq - START WITH 25012 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_badging_badge_collection_id_seq OWNER TO augur; - --- --- Name: repo_badging; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_badging ( - badge_collection_id bigint DEFAULT nextval('augur_data.repo_badging_badge_collection_id_seq'::regclass) NOT NULL, - repo_id bigint, - created_at timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - data jsonb -); - - -ALTER TABLE augur_data.repo_badging OWNER TO augur; - --- --- Name: TABLE repo_badging; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo_badging IS 'This will be collected from the LF’s Badging API -https://bestpractices.coreinfrastructure.org/projects.json?pq=https%3A%2F%2Fgithub.com%2Fchaoss%2Faugur -'; - - --- --- Name: repo_cluster_messages_msg_cluster_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_cluster_messages_msg_cluster_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_cluster_messages_msg_cluster_id_seq OWNER TO augur; - --- --- Name: repo_cluster_messages; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_cluster_messages ( - msg_cluster_id bigint DEFAULT nextval('augur_data.repo_cluster_messages_msg_cluster_id_seq'::regclass) NOT NULL, - repo_id bigint, - cluster_content integer, - cluster_mechanism integer, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_cluster_messages OWNER TO augur; - --- --- Name: repo_dependencies_repo_dependencies_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_dependencies_repo_dependencies_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_dependencies_repo_dependencies_id_seq OWNER TO augur; - --- --- Name: repo_dependencies; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_dependencies ( - repo_dependencies_id bigint DEFAULT nextval('augur_data.repo_dependencies_repo_dependencies_id_seq'::regclass) NOT NULL, - repo_id bigint, - dep_name character varying, - dep_count integer, - dep_language character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_dependencies OWNER TO augur; - --- --- Name: TABLE repo_dependencies; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo_dependencies IS 'Contains the dependencies for a repo.'; - - --- --- Name: COLUMN repo_dependencies.repo_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_dependencies.repo_id IS 'Forign key for repo id. '; - - --- --- Name: COLUMN repo_dependencies.dep_name; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_dependencies.dep_name IS 'Name of the dependancy found in project. '; - - --- --- Name: COLUMN repo_dependencies.dep_count; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_dependencies.dep_count IS 'Number of times the dependancy was found. '; - - --- --- Name: COLUMN repo_dependencies.dep_language; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_dependencies.dep_language IS 'Language of the dependancy. '; - - --- --- Name: repo_deps_libyear_repo_deps_libyear_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_deps_libyear_repo_deps_libyear_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_deps_libyear_repo_deps_libyear_id_seq OWNER TO augur; - --- --- Name: repo_deps_libyear; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_deps_libyear ( - repo_deps_libyear_id bigint DEFAULT nextval('augur_data.repo_deps_libyear_repo_deps_libyear_id_seq'::regclass) NOT NULL, - repo_id bigint, - name character varying, - requirement character varying, - type character varying, - package_manager character varying, - current_verion character varying, - latest_version character varying, - current_release_date character varying, - latest_release_date character varying, - libyear double precision, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_deps_libyear OWNER TO augur; - --- --- Name: repo_deps_scorecard_repo_deps_scorecard_id_seq1; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1 - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1 OWNER TO augur; - --- --- Name: repo_deps_scorecard; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_deps_scorecard ( - repo_deps_scorecard_id bigint DEFAULT nextval('augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1'::regclass) NOT NULL, - repo_id bigint, - name character varying, - status character varying, - score character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_deps_scorecard OWNER TO augur; - --- --- Name: repo_group_insights_rgi_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_group_insights_rgi_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_group_insights_rgi_id_seq OWNER TO augur; - --- --- Name: repo_group_insights; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_group_insights ( - rgi_id bigint DEFAULT nextval('augur_data.repo_group_insights_rgi_id_seq'::regclass) NOT NULL, - repo_group_id bigint, - rgi_metric character varying, - rgi_value character varying, - cms_id bigint, - rgi_fresh boolean, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_group_insights OWNER TO augur; - --- --- Name: TABLE repo_group_insights; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo_group_insights IS 'This table is output from an analytical worker inside of Augur. It runs through the different metrics on a REPOSITORY_GROUP and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. - -Worker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. '; - - --- --- Name: COLUMN repo_group_insights.rgi_fresh; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_group_insights.rgi_fresh IS 'false if the date is before the statistic that triggered the insight, true if after. This allows us to automatically display only "fresh insights" and avoid displaying "stale insights". The insight worker will populate this table. '; - - --- --- Name: repo_groups_repo_group_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_groups_repo_group_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_groups_repo_group_id_seq OWNER TO augur; - --- --- Name: repo_groups; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_groups ( - repo_group_id bigint DEFAULT nextval('augur_data.repo_groups_repo_group_id_seq'::regclass) NOT NULL, - rg_name character varying NOT NULL, - rg_description character varying DEFAULT 'NULL'::character varying, - rg_website character varying(128) DEFAULT 'NULL'::character varying, - rg_recache smallint DEFAULT 1, - rg_last_modified timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL, - rg_type character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.repo_groups OWNER TO augur; - --- --- Name: TABLE repo_groups; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo_groups IS 'rg_type is intended to be either a GitHub Organization or a User Created Repo Group. '; - - --- --- Name: repo_groups_list_serve_rgls_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_groups_list_serve_rgls_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_groups_list_serve_rgls_id_seq OWNER TO augur; - --- --- Name: repo_groups_list_serve; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_groups_list_serve ( - rgls_id bigint DEFAULT nextval('augur_data.repo_groups_list_serve_rgls_id_seq'::regclass) NOT NULL, - repo_group_id bigint NOT NULL, - rgls_name character varying, - rgls_description character varying(3000), - rgls_sponsor character varying, - rgls_email character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.repo_groups_list_serve OWNER TO augur; - --- --- Name: repo_info_repo_info_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_info_repo_info_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_info_repo_info_id_seq OWNER TO augur; - --- --- Name: repo_info; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_info ( - repo_info_id bigint DEFAULT nextval('augur_data.repo_info_repo_info_id_seq'::regclass) NOT NULL, - repo_id bigint NOT NULL, - last_updated timestamp(0) without time zone DEFAULT NULL::timestamp without time zone, - issues_enabled character varying, - open_issues integer, - pull_requests_enabled character varying, - wiki_enabled character varying, - pages_enabled character varying, - fork_count integer, - default_branch character varying, - watchers_count integer, - "UUID" integer, - license character varying, - stars_count integer, - committers_count integer, - issue_contributors_count character varying, - changelog_file character varying, - contributing_file character varying, - license_file character varying, - code_of_conduct_file character varying, - security_issue_file character varying, - security_audit_file character varying, - status character varying, - keywords character varying, - commit_count bigint, - issues_count bigint, - issues_closed bigint, - pull_request_count bigint, - pull_requests_open bigint, - pull_requests_closed bigint, - pull_requests_merged bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_info OWNER TO augur; - --- --- Name: repo_insights_ri_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_insights_ri_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_insights_ri_id_seq OWNER TO augur; - --- --- Name: repo_insights; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_insights ( - ri_id bigint DEFAULT nextval('augur_data.repo_insights_ri_id_seq'::regclass) NOT NULL, - repo_id bigint, - ri_metric character varying, - ri_value character varying, - ri_date timestamp(0) without time zone, - ri_fresh boolean, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - ri_score numeric, - ri_field character varying, - ri_detection_method character varying -); - - -ALTER TABLE augur_data.repo_insights OWNER TO augur; - --- --- Name: TABLE repo_insights; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo_insights IS 'This table is output from an analytical worker inside of Augur. It runs through the different metrics on a repository and identifies the five to ten most “interesting” metrics as defined by some kind of delta or other factor. The algorithm is going to evolve. - -Worker Design Notes: The idea is that the "insight worker" will scan through a bunch of active metrics or "synthetic metrics" to list the most important insights. '; - - --- --- Name: COLUMN repo_insights.ri_fresh; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights.ri_fresh IS 'false if the date is before the statistic that triggered the insight, true if after. This allows us to automatically display only "fresh insights" and avoid displaying "stale insights". The insight worker will populate this table. '; - - --- --- Name: repo_insights_records_ri_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_insights_records_ri_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_insights_records_ri_id_seq OWNER TO augur; - --- --- Name: repo_insights_records; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_insights_records ( - ri_id bigint DEFAULT nextval('augur_data.repo_insights_records_ri_id_seq'::regclass) NOT NULL, - repo_id bigint, - ri_metric character varying, - ri_field character varying, - ri_value character varying, - ri_date timestamp(6) without time zone, - ri_score double precision, - ri_detection_method character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(6) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_insights_records OWNER TO augur; - --- --- Name: COLUMN repo_insights_records.ri_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.ri_id IS 'Primary key. '; - - --- --- Name: COLUMN repo_insights_records.repo_id; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.repo_id IS 'Refers to repo table primary key. Will have a foreign key'; - - --- --- Name: COLUMN repo_insights_records.ri_metric; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.ri_metric IS 'The metric endpoint'; - - --- --- Name: COLUMN repo_insights_records.ri_field; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.ri_field IS 'The field in the metric endpoint'; - - --- --- Name: COLUMN repo_insights_records.ri_value; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.ri_value IS 'The value of the endpoint in ri_field'; - - --- --- Name: COLUMN repo_insights_records.ri_date; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.ri_date IS 'The date the insight is for; in other words, some anomaly occurred on this date. '; - - --- --- Name: COLUMN repo_insights_records.ri_score; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.ri_score IS 'A Score, derived from the algorithm used. '; - - --- --- Name: COLUMN repo_insights_records.ri_detection_method; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.ri_detection_method IS 'A confidence interval or other expression of the type of threshold and the value of a threshold met in order for it to be "an insight". Example. "95% confidence interval". '; - - --- --- Name: COLUMN repo_insights_records.tool_source; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.tool_source IS 'Standard Augur Metadata'; - - --- --- Name: COLUMN repo_insights_records.tool_version; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.tool_version IS 'Standard Augur Metadata'; - - --- --- Name: COLUMN repo_insights_records.data_source; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.data_source IS 'Standard Augur Metadata'; - - --- --- Name: COLUMN repo_insights_records.data_collection_date; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_insights_records.data_collection_date IS 'Standard Augur Metadata'; - - --- --- Name: repo_labor_repo_labor_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_labor_repo_labor_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_labor_repo_labor_id_seq OWNER TO augur; - --- --- Name: repo_labor; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_labor ( - repo_labor_id bigint DEFAULT nextval('augur_data.repo_labor_repo_labor_id_seq'::regclass) NOT NULL, - repo_id bigint, - repo_clone_date timestamp(0) without time zone, - rl_analysis_date timestamp(0) without time zone, - programming_language character varying, - file_path character varying, - file_name character varying, - total_lines integer, - code_lines integer, - comment_lines integer, - blank_lines integer, - code_complexity integer, - repo_url character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.repo_labor OWNER TO augur; - --- --- Name: TABLE repo_labor; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo_labor IS 'repo_labor is a derivative of tables used to store scc code and complexity counting statistics that are inputs to labor analysis, which are components of CHAOSS value metric calculations. '; - - --- --- Name: COLUMN repo_labor.repo_url; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON COLUMN augur_data.repo_labor.repo_url IS 'This is a convenience column to simplify analysis against external datasets'; - - --- --- Name: repo_meta_rmeta_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_meta_rmeta_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_meta_rmeta_id_seq OWNER TO augur; - --- --- Name: repo_meta; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_meta ( - repo_id bigint NOT NULL, - rmeta_id bigint DEFAULT nextval('augur_data.repo_meta_rmeta_id_seq'::regclass) NOT NULL, - rmeta_name character varying, - rmeta_value character varying DEFAULT 0, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.repo_meta OWNER TO augur; - --- --- Name: TABLE repo_meta; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo_meta IS 'Project Languages'; - - --- --- Name: repo_sbom_scans_rsb_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_sbom_scans_rsb_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_sbom_scans_rsb_id_seq OWNER TO augur; - --- --- Name: repo_sbom_scans; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_sbom_scans ( - rsb_id bigint DEFAULT nextval('augur_data.repo_sbom_scans_rsb_id_seq'::regclass) NOT NULL, - repo_id bigint, - sbom_scan json -); - - -ALTER TABLE augur_data.repo_sbom_scans OWNER TO augur; - --- --- Name: repo_stats_rstat_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_stats_rstat_id_seq - START WITH 25430 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_stats_rstat_id_seq OWNER TO augur; - --- --- Name: repo_stats; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_stats ( - repo_id bigint NOT NULL, - rstat_id bigint DEFAULT nextval('augur_data.repo_stats_rstat_id_seq'::regclass) NOT NULL, - rstat_name character varying(400), - rstat_value bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone -); - - -ALTER TABLE augur_data.repo_stats OWNER TO augur; - --- --- Name: TABLE repo_stats; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON TABLE augur_data.repo_stats IS 'Project Watchers'; - - --- --- Name: repo_test_coverage_repo_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_test_coverage_repo_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_test_coverage_repo_id_seq OWNER TO augur; - --- --- Name: repo_test_coverage; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_test_coverage ( - repo_id bigint DEFAULT nextval('augur_data.repo_test_coverage_repo_id_seq'::regclass) NOT NULL, - repo_clone_date timestamp(0) without time zone, - rtc_analysis_date timestamp(0) without time zone, - programming_language character varying, - file_path character varying, - file_name character varying, - testing_tool character varying, - file_statement_count bigint, - file_subroutine_count bigint, - file_statements_tested bigint, - file_subroutines_tested bigint, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_test_coverage OWNER TO augur; - --- --- Name: repo_topic_repo_topic_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.repo_topic_repo_topic_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.repo_topic_repo_topic_id_seq OWNER TO augur; - --- --- Name: repo_topic; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repo_topic ( - repo_topic_id bigint DEFAULT nextval('augur_data.repo_topic_repo_topic_id_seq'::regclass) NOT NULL, - repo_id bigint, - topic_id integer, - topic_prob double precision, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.repo_topic OWNER TO augur; - --- --- Name: repos_fetch_log; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.repos_fetch_log ( - repos_id integer NOT NULL, - status character varying(128) NOT NULL, - date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE augur_data.repos_fetch_log OWNER TO augur; - --- --- Name: settings; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.settings ( - id integer NOT NULL, - setting character varying(32) NOT NULL, - value character varying NOT NULL, - last_modified timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE augur_data.settings OWNER TO augur; - --- --- Name: topic_words_topic_words_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.topic_words_topic_words_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.topic_words_topic_words_id_seq OWNER TO augur; - --- --- Name: topic_words; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.topic_words ( - topic_words_id bigint DEFAULT nextval('augur_data.topic_words_topic_words_id_seq'::regclass) NOT NULL, - topic_id bigint, - word character varying, - word_prob double precision, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.topic_words OWNER TO augur; - --- --- Name: unknown_cache; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.unknown_cache ( - type character varying(10) NOT NULL, - repo_group_id integer NOT NULL, - email character varying(128) NOT NULL, - domain character varying(128) DEFAULT 'NULL'::character varying, - added bigint NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.unknown_cache OWNER TO augur; - --- --- Name: unresolved_commit_emails_email_unresolved_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.unresolved_commit_emails_email_unresolved_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.unresolved_commit_emails_email_unresolved_id_seq OWNER TO augur; - --- --- Name: unresolved_commit_emails; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.unresolved_commit_emails ( - email_unresolved_id bigint DEFAULT nextval('augur_data.unresolved_commit_emails_email_unresolved_id_seq'::regclass) NOT NULL, - email character varying NOT NULL, - name character varying, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE augur_data.unresolved_commit_emails OWNER TO augur; - --- --- Name: utility_log_id_seq1; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.utility_log_id_seq1 - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.utility_log_id_seq1 OWNER TO augur; - --- --- Name: utility_log; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.utility_log ( - id bigint DEFAULT nextval('augur_data.utility_log_id_seq1'::regclass) NOT NULL, - level character varying(8) NOT NULL, - status character varying NOT NULL, - attempted timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE augur_data.utility_log OWNER TO augur; - --- --- Name: utility_log_id_seq; Type: SEQUENCE; Schema: augur_data; Owner: augur --- - -CREATE SEQUENCE augur_data.utility_log_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_data.utility_log_id_seq OWNER TO augur; - --- --- Name: working_commits; Type: TABLE; Schema: augur_data; Owner: augur --- - -CREATE TABLE augur_data.working_commits ( - repos_id integer NOT NULL, - working_commit character varying(40) DEFAULT 'NULL'::character varying -); - - -ALTER TABLE augur_data.working_commits OWNER TO augur; - --- --- Name: affiliations_corp_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur --- - -CREATE SEQUENCE augur_operations.affiliations_corp_id_seq - START WITH 620000 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_operations.affiliations_corp_id_seq OWNER TO augur; - --- --- Name: all; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations."all" ( - "Name" character varying, - "Bytes" character varying, - "Lines" character varying, - "Code" character varying, - "Comment" character varying, - "Blank" character varying, - "Complexity" character varying, - "Count" character varying, - "WeightedComplexity" character varying, - "Files" character varying -); - - -ALTER TABLE augur_operations."all" OWNER TO augur; - --- --- Name: augur_settings_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur --- - -CREATE SEQUENCE augur_operations.augur_settings_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_operations.augur_settings_id_seq OWNER TO augur; - --- --- Name: augur_settings; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.augur_settings ( - id bigint DEFAULT nextval('augur_operations.augur_settings_id_seq'::regclass) NOT NULL, - setting character varying, - value character varying, - last_modified timestamp(0) without time zone DEFAULT CURRENT_DATE -); - - -ALTER TABLE augur_operations.augur_settings OWNER TO augur; - --- --- Name: TABLE augur_settings; Type: COMMENT; Schema: augur_operations; Owner: augur --- - -COMMENT ON TABLE augur_operations.augur_settings IS 'Augur settings include the schema version, and the Augur API Key as of 10/25/2020. Future augur settings may be stored in this table, which has the basic structure of a name-value pair. '; - - --- --- Name: config; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.config ( - id smallint NOT NULL, - section_name character varying NOT NULL, - setting_name character varying NOT NULL, - value character varying, - type character varying -); - - -ALTER TABLE augur_operations.config OWNER TO augur; - --- --- Name: config_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur --- - -CREATE SEQUENCE augur_operations.config_id_seq - AS smallint - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_operations.config_id_seq OWNER TO augur; - --- --- Name: config_id_seq; Type: SEQUENCE OWNED BY; Schema: augur_operations; Owner: augur --- - -ALTER SEQUENCE augur_operations.config_id_seq OWNED BY augur_operations.config.id; - - --- --- Name: gh_worker_history_history_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur --- - -CREATE SEQUENCE augur_operations.gh_worker_history_history_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_operations.gh_worker_history_history_id_seq OWNER TO augur; - --- --- Name: repos_fetch_log; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.repos_fetch_log ( - repos_id integer NOT NULL, - status character varying(128) NOT NULL, - date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE augur_operations.repos_fetch_log OWNER TO augur; - --- --- Name: TABLE repos_fetch_log; Type: COMMENT; Schema: augur_operations; Owner: augur --- - -COMMENT ON TABLE augur_operations.repos_fetch_log IS 'For future use when we move all working tables to the augur_operations schema. '; - - --- --- Name: users; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.users ( - user_id integer NOT NULL, - login_name character varying NOT NULL, - login_hashword character varying NOT NULL, - email character varying NOT NULL, - text_phone character varying, - first_name character varying NOT NULL, - last_name character varying NOT NULL, - tool_source character varying, - tool_version character varying, - data_source character varying, - data_collection_date timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP, - admin boolean NOT NULL -); - - -ALTER TABLE augur_operations.users OWNER TO augur; - --- --- Name: users_user_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur --- - -CREATE SEQUENCE augur_operations.users_user_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_operations.users_user_id_seq OWNER TO augur; - --- --- Name: users_user_id_seq; Type: SEQUENCE OWNED BY; Schema: augur_operations; Owner: augur --- - -ALTER SEQUENCE augur_operations.users_user_id_seq OWNED BY augur_operations.users.user_id; - - --- --- Name: worker_history; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.worker_history ( - history_id bigint DEFAULT nextval('augur_operations.gh_worker_history_history_id_seq'::regclass) NOT NULL, - repo_id bigint, - worker character varying(255) NOT NULL, - job_model character varying(255) NOT NULL, - oauth_id integer, - "timestamp" timestamp(0) without time zone NOT NULL, - status character varying(7) NOT NULL, - total_results integer -); - - -ALTER TABLE augur_operations.worker_history OWNER TO augur; - --- --- Name: TABLE worker_history; Type: COMMENT; Schema: augur_operations; Owner: augur --- - -COMMENT ON TABLE augur_operations.worker_history IS 'This table stores the complete history of job execution, including success and failure. It is useful for troubleshooting. '; - - --- --- Name: worker_job; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.worker_job ( - job_model character varying(255) NOT NULL, - state integer DEFAULT 0 NOT NULL, - zombie_head integer, - since_id_str character varying(255) DEFAULT '0'::character varying NOT NULL, - description character varying(255) DEFAULT 'None'::character varying, - last_count integer, - last_run timestamp(0) without time zone DEFAULT NULL::timestamp without time zone, - analysis_state integer DEFAULT 0, - oauth_id integer NOT NULL -); - - -ALTER TABLE augur_operations.worker_job OWNER TO augur; - --- --- Name: TABLE worker_job; Type: COMMENT; Schema: augur_operations; Owner: augur --- - -COMMENT ON TABLE augur_operations.worker_job IS 'This table stores the jobs workers collect data for. A job is found in the code, and in the augur.config.json under the construct of a “model”. '; - - --- --- Name: worker_oauth_oauth_id_seq; Type: SEQUENCE; Schema: augur_operations; Owner: augur --- - -CREATE SEQUENCE augur_operations.worker_oauth_oauth_id_seq - START WITH 1000 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER TABLE augur_operations.worker_oauth_oauth_id_seq OWNER TO augur; - --- --- Name: worker_oauth; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.worker_oauth ( - oauth_id bigint DEFAULT nextval('augur_operations.worker_oauth_oauth_id_seq'::regclass) NOT NULL, - name character varying(255) NOT NULL, - consumer_key character varying(255) NOT NULL, - consumer_secret character varying(255) NOT NULL, - access_token character varying(255) NOT NULL, - access_token_secret character varying(255) NOT NULL, - repo_directory character varying, - platform character varying DEFAULT 'github'::character varying -); - - -ALTER TABLE augur_operations.worker_oauth OWNER TO augur; - --- --- Name: TABLE worker_oauth; Type: COMMENT; Schema: augur_operations; Owner: augur --- - -COMMENT ON TABLE augur_operations.worker_oauth IS 'This table stores credentials for retrieving data from platform API’s. Entries in this table must comply with the terms of service for each platform. '; - - --- --- Name: worker_settings_facade; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.worker_settings_facade ( - id integer NOT NULL, - setting character varying(32) NOT NULL, - value character varying NOT NULL, - last_modified timestamp(0) without time zone DEFAULT CURRENT_TIMESTAMP NOT NULL -); - - -ALTER TABLE augur_operations.worker_settings_facade OWNER TO augur; - --- --- Name: TABLE worker_settings_facade; Type: COMMENT; Schema: augur_operations; Owner: augur --- - -COMMENT ON TABLE augur_operations.worker_settings_facade IS 'For future use when we move all working tables to the augur_operations schema. '; - - --- --- Name: working_commits; Type: TABLE; Schema: augur_operations; Owner: augur --- - -CREATE TABLE augur_operations.working_commits ( - repos_id integer NOT NULL, - working_commit character varying(40) DEFAULT 'NULL'::character varying -); - - -ALTER TABLE augur_operations.working_commits OWNER TO augur; - --- --- Name: TABLE working_commits; Type: COMMENT; Schema: augur_operations; Owner: augur --- - -COMMENT ON TABLE augur_operations.working_commits IS 'For future use when we move all working tables to the augur_operations schema. '; - - --- --- Name: alembic_version; Type: TABLE; Schema: public; Owner: augur --- - -CREATE TABLE public.alembic_version ( - version_num character varying(32) NOT NULL -); - - -ALTER TABLE public.alembic_version OWNER TO augur; - --- --- Name: annotation_types_annotation_type_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.annotation_types_annotation_type_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.annotation_types_annotation_type_id_seq OWNER TO augur; - --- --- Name: annotation_types; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.annotation_types ( - annotation_type_id integer DEFAULT nextval('spdx.annotation_types_annotation_type_id_seq'::regclass) NOT NULL, - name character varying(255) NOT NULL -); - - -ALTER TABLE spdx.annotation_types OWNER TO augur; - --- --- Name: annotations_annotation_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.annotations_annotation_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.annotations_annotation_id_seq OWNER TO augur; - --- --- Name: annotations; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.annotations ( - annotation_id integer DEFAULT nextval('spdx.annotations_annotation_id_seq'::regclass) NOT NULL, - document_id integer NOT NULL, - annotation_type_id integer NOT NULL, - identifier_id integer NOT NULL, - creator_id integer NOT NULL, - created_ts timestamp(6) with time zone, - comment text NOT NULL -); - - -ALTER TABLE spdx.annotations OWNER TO augur; - --- --- Name: augur_repo_map_map_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.augur_repo_map_map_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.augur_repo_map_map_id_seq OWNER TO augur; - --- --- Name: augur_repo_map; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.augur_repo_map ( - map_id integer DEFAULT nextval('spdx.augur_repo_map_map_id_seq'::regclass) NOT NULL, - dosocs_pkg_id integer, - dosocs_pkg_name text, - repo_id integer, - repo_path text -); - - -ALTER TABLE spdx.augur_repo_map OWNER TO augur; - --- --- Name: creator_types_creator_type_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.creator_types_creator_type_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.creator_types_creator_type_id_seq OWNER TO augur; - --- --- Name: creator_types; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.creator_types ( - creator_type_id integer DEFAULT nextval('spdx.creator_types_creator_type_id_seq'::regclass) NOT NULL, - name character varying(255) NOT NULL -); - - -ALTER TABLE spdx.creator_types OWNER TO augur; - --- --- Name: creators_creator_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.creators_creator_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.creators_creator_id_seq OWNER TO augur; - --- --- Name: creators; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.creators ( - creator_id integer DEFAULT nextval('spdx.creators_creator_id_seq'::regclass) NOT NULL, - creator_type_id integer NOT NULL, - name character varying(255) NOT NULL, - email character varying(255) NOT NULL -); - - -ALTER TABLE spdx.creators OWNER TO augur; - --- --- Name: document_namespaces_document_namespace_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.document_namespaces_document_namespace_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.document_namespaces_document_namespace_id_seq OWNER TO augur; - --- --- Name: document_namespaces; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.document_namespaces ( - document_namespace_id integer DEFAULT nextval('spdx.document_namespaces_document_namespace_id_seq'::regclass) NOT NULL, - uri character varying(500) NOT NULL -); - - -ALTER TABLE spdx.document_namespaces OWNER TO augur; - --- --- Name: documents_document_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.documents_document_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.documents_document_id_seq OWNER TO augur; - --- --- Name: documents; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.documents ( - document_id integer DEFAULT nextval('spdx.documents_document_id_seq'::regclass) NOT NULL, - document_namespace_id integer NOT NULL, - data_license_id integer NOT NULL, - spdx_version character varying(255) NOT NULL, - name character varying(255) NOT NULL, - license_list_version character varying(255) NOT NULL, - created_ts timestamp(6) with time zone NOT NULL, - creator_comment text NOT NULL, - document_comment text NOT NULL, - package_id integer NOT NULL -); - - -ALTER TABLE spdx.documents OWNER TO augur; - --- --- Name: documents_creators_document_creator_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.documents_creators_document_creator_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.documents_creators_document_creator_id_seq OWNER TO augur; - --- --- Name: documents_creators; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.documents_creators ( - document_creator_id integer DEFAULT nextval('spdx.documents_creators_document_creator_id_seq'::regclass) NOT NULL, - document_id integer NOT NULL, - creator_id integer NOT NULL -); - - -ALTER TABLE spdx.documents_creators OWNER TO augur; - --- --- Name: external_refs_external_ref_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.external_refs_external_ref_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.external_refs_external_ref_id_seq OWNER TO augur; - --- --- Name: external_refs; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.external_refs ( - external_ref_id integer DEFAULT nextval('spdx.external_refs_external_ref_id_seq'::regclass) NOT NULL, - document_id integer NOT NULL, - document_namespace_id integer NOT NULL, - id_string character varying(255) NOT NULL, - sha256 character varying(64) NOT NULL -); - - -ALTER TABLE spdx.external_refs OWNER TO augur; - --- --- Name: file_contributors_file_contributor_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.file_contributors_file_contributor_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.file_contributors_file_contributor_id_seq OWNER TO augur; - --- --- Name: file_contributors; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.file_contributors ( - file_contributor_id integer DEFAULT nextval('spdx.file_contributors_file_contributor_id_seq'::regclass) NOT NULL, - file_id integer NOT NULL, - contributor text NOT NULL -); - - -ALTER TABLE spdx.file_contributors OWNER TO augur; - --- --- Name: file_types; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.file_types ( - file_type_id integer, - name character varying(255) NOT NULL -); - - -ALTER TABLE spdx.file_types OWNER TO augur; - --- --- Name: file_types_file_type_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.file_types_file_type_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.file_types_file_type_id_seq OWNER TO augur; - --- --- Name: files_file_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.files_file_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.files_file_id_seq OWNER TO augur; - --- --- Name: files; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.files ( - file_id integer DEFAULT nextval('spdx.files_file_id_seq'::regclass) NOT NULL, - file_type_id integer, - sha256 character varying(64) NOT NULL, - copyright_text text, - package_id integer, - comment text NOT NULL, - notice text NOT NULL -); - - -ALTER TABLE spdx.files OWNER TO augur; - --- --- Name: files_licenses_file_license_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.files_licenses_file_license_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.files_licenses_file_license_id_seq OWNER TO augur; - --- --- Name: files_licenses; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.files_licenses ( - file_license_id integer DEFAULT nextval('spdx.files_licenses_file_license_id_seq'::regclass) NOT NULL, - file_id integer NOT NULL, - license_id integer NOT NULL, - extracted_text text NOT NULL -); - - -ALTER TABLE spdx.files_licenses OWNER TO augur; - --- --- Name: files_scans_file_scan_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.files_scans_file_scan_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.files_scans_file_scan_id_seq OWNER TO augur; - --- --- Name: files_scans; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.files_scans ( - file_scan_id integer DEFAULT nextval('spdx.files_scans_file_scan_id_seq'::regclass) NOT NULL, - file_id integer NOT NULL, - scanner_id integer NOT NULL -); - - -ALTER TABLE spdx.files_scans OWNER TO augur; - --- --- Name: identifiers_identifier_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.identifiers_identifier_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.identifiers_identifier_id_seq OWNER TO augur; - --- --- Name: identifiers; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.identifiers ( - identifier_id integer DEFAULT nextval('spdx.identifiers_identifier_id_seq'::regclass) NOT NULL, - document_namespace_id integer NOT NULL, - id_string character varying(255) NOT NULL, - document_id integer, - package_id integer, - package_file_id integer, - CONSTRAINT ck_identifier_exactly_one CHECK ((((((document_id IS NOT NULL))::integer + ((package_id IS NOT NULL))::integer) + ((package_file_id IS NOT NULL))::integer) = 1)) -); - - -ALTER TABLE spdx.identifiers OWNER TO augur; - --- --- Name: licenses_license_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.licenses_license_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.licenses_license_id_seq OWNER TO augur; - --- --- Name: licenses; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.licenses ( - license_id integer DEFAULT nextval('spdx.licenses_license_id_seq'::regclass) NOT NULL, - name character varying(255), - short_name character varying(255) NOT NULL, - cross_reference text NOT NULL, - comment text NOT NULL, - is_spdx_official boolean NOT NULL -); - - -ALTER TABLE spdx.licenses OWNER TO augur; - --- --- Name: packages_package_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.packages_package_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.packages_package_id_seq OWNER TO augur; - --- --- Name: packages; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.packages ( - package_id integer DEFAULT nextval('spdx.packages_package_id_seq'::regclass) NOT NULL, - name character varying(255) NOT NULL, - version character varying(255) NOT NULL, - file_name text NOT NULL, - supplier_id integer, - originator_id integer, - download_location text, - verification_code character varying(64) NOT NULL, - ver_code_excluded_file_id integer, - sha256 character varying(64), - home_page text, - source_info text NOT NULL, - concluded_license_id integer, - declared_license_id integer, - license_comment text NOT NULL, - copyright_text text, - summary text NOT NULL, - description text NOT NULL, - comment text NOT NULL, - dosocs2_dir_code character varying(64), - CONSTRAINT uc_sha256_ds2_dir_code_exactly_one CHECK (((((sha256 IS NOT NULL))::integer + ((dosocs2_dir_code IS NOT NULL))::integer) = 1)) -); - - -ALTER TABLE spdx.packages OWNER TO augur; - --- --- Name: packages_files_package_file_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.packages_files_package_file_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.packages_files_package_file_id_seq OWNER TO augur; - --- --- Name: packages_files; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.packages_files ( - package_file_id integer DEFAULT nextval('spdx.packages_files_package_file_id_seq'::regclass) NOT NULL, - package_id integer NOT NULL, - file_id integer NOT NULL, - concluded_license_id integer, - license_comment text NOT NULL, - file_name text NOT NULL -); - - -ALTER TABLE spdx.packages_files OWNER TO augur; - --- --- Name: packages_scans_package_scan_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.packages_scans_package_scan_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.packages_scans_package_scan_id_seq OWNER TO augur; - --- --- Name: packages_scans; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.packages_scans ( - package_scan_id integer DEFAULT nextval('spdx.packages_scans_package_scan_id_seq'::regclass) NOT NULL, - package_id integer NOT NULL, - scanner_id integer NOT NULL -); - - -ALTER TABLE spdx.packages_scans OWNER TO augur; - --- --- Name: projects_package_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.projects_package_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.projects_package_id_seq OWNER TO augur; - --- --- Name: projects; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.projects ( - package_id integer DEFAULT nextval('spdx.projects_package_id_seq'::regclass) NOT NULL, - name text NOT NULL, - homepage text NOT NULL, - uri text NOT NULL -); - - -ALTER TABLE spdx.projects OWNER TO augur; - --- --- Name: relationship_types_relationship_type_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.relationship_types_relationship_type_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.relationship_types_relationship_type_id_seq OWNER TO augur; - --- --- Name: relationship_types; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.relationship_types ( - relationship_type_id integer DEFAULT nextval('spdx.relationship_types_relationship_type_id_seq'::regclass) NOT NULL, - name character varying(255) NOT NULL -); - - -ALTER TABLE spdx.relationship_types OWNER TO augur; - --- --- Name: relationships_relationship_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.relationships_relationship_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.relationships_relationship_id_seq OWNER TO augur; - --- --- Name: relationships; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.relationships ( - relationship_id integer DEFAULT nextval('spdx.relationships_relationship_id_seq'::regclass) NOT NULL, - left_identifier_id integer NOT NULL, - right_identifier_id integer NOT NULL, - relationship_type_id integer NOT NULL, - relationship_comment text NOT NULL -); - - -ALTER TABLE spdx.relationships OWNER TO augur; - --- --- Name: sbom_scans; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.sbom_scans ( - repo_id integer, - sbom_scan json -); - - -ALTER TABLE spdx.sbom_scans OWNER TO augur; - --- --- Name: scanners_scanner_id_seq; Type: SEQUENCE; Schema: spdx; Owner: augur --- - -CREATE SEQUENCE spdx.scanners_scanner_id_seq - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - MAXVALUE 2147483647 - CACHE 1; - - -ALTER TABLE spdx.scanners_scanner_id_seq OWNER TO augur; - --- --- Name: scanners; Type: TABLE; Schema: spdx; Owner: augur --- - -CREATE TABLE spdx.scanners ( - scanner_id integer DEFAULT nextval('spdx.scanners_scanner_id_seq'::regclass) NOT NULL, - name character varying(255) NOT NULL -); - - -ALTER TABLE spdx.scanners OWNER TO augur; - --- --- Name: chaoss_user chaoss_id; Type: DEFAULT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.chaoss_user ALTER COLUMN chaoss_id SET DEFAULT nextval('augur_data.chaoss_user_chaoss_id_seq'::regclass); - - --- --- Name: config id; Type: DEFAULT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.config ALTER COLUMN id SET DEFAULT nextval('augur_operations.config_id_seq'::regclass); - - --- --- Name: users user_id; Type: DEFAULT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.users ALTER COLUMN user_id SET DEFAULT nextval('augur_operations.users_user_id_seq'::regclass); - - --- --- Data for Name: analysis_log; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.analysis_log (repos_id, status, date_attempted) FROM stdin; -\. - - --- --- Data for Name: chaoss_metric_status; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.chaoss_metric_status (cms_id, cm_group, cm_source, cm_type, cm_backend_status, cm_frontend_status, cm_defined, cm_api_endpoint_repo, cm_api_endpoint_rg, cm_name, cm_working_group, cm_info, tool_source, tool_version, data_source, data_collection_date, cm_working_group_focus_area) FROM stdin; -2 growth-maturity-decline githubapi timeseries implemented unimplemented t /api/unstable///timeseries/githubapi/issues \N Open Issues growth-maturity-decline "open-issues" Insight Worker 0.0.1 githubapi 2019-06-20 22:41:41 \N -3 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/issues \N Open Issues growth-maturity-decline "open-issues" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:42:15 \N -4 growth-maturity-decline githubapi timeseries implemented unimplemented t /api/unstable///timeseries/githubapi/issues/closed \N Closed Issues growth-maturity-decline "closed-issues" Insight Worker 0.0.1 githubapi 2019-06-20 22:45:53 \N -5 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/issues/closed \N Closed Issues growth-maturity-decline "closed-issues" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:26 \N -6 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/issues/response_time \N First Response To Issue Duration growth-maturity-decline "first-response-to-issue-duration" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:27 \N -7 growth-maturity-decline githubapi timeseries implemented unimplemented t /api/unstable///timeseries/githubapi/commits \N Code Commits growth-maturity-decline "code-commits" Insight Worker 0.0.1 githubapi 2019-06-20 22:49:29 \N -8 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/commits \N Code Commits growth-maturity-decline "code-commits" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:30 \N -9 growth-maturity-decline githubapi metric implemented unimplemented t /api/unstable///lines_changed \N Lines Of Code Changed growth-maturity-decline "lines-of-code-changed" Insight Worker 0.0.1 githubapi 2019-06-20 22:49:32 \N -10 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/pulls/maintainer_response_time \N Maintainer Response To Merge Request Duration growth-maturity-decline "maintainer-response-to-merge-request-duration" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:33 \N -11 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/code_review_iteration \N Code Review Iteration growth-maturity-decline "code-review-iteration" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:35 \N -12 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/forks \N Forks growth-maturity-decline "forks" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:36 \N -13 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/pulls \N Pull Requests Open growth-maturity-decline "pull-requests-open" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:38 \N -14 growth-maturity-decline ghtorrent timeseries implemented unimplemented f /api/unstable///timeseries/pulls/closed \N Pull Requests Closed growth-maturity-decline "pull-requests-closed" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:39 \N -15 growth-maturity-decline ghtorrent timeseries implemented unimplemented f /api/unstable///timeseries/pulls/response_time \N Pull Request Comment Duration growth-maturity-decline "pull-request-comment-duration" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:41 \N -16 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/pulls/comments \N Pull Request Comments growth-maturity-decline "pull-request-comments" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:42 \N -17 growth-maturity-decline augur_db metric implemented unimplemented t /api/unstable/repo-groups//repos//contributors \N Contributors growth-maturity-decline "contributors" Insight Worker 0.0.1 augur_db 2019-06-20 22:49:44 \N -18 growth-maturity-decline githubapi metric implemented unimplemented t /api/unstable///githubapi/contributors \N Contributors growth-maturity-decline "contributors" Insight Worker 0.0.1 githubapi 2019-06-20 22:49:45 \N -19 growth-maturity-decline ghtorrent metric implemented implemented t /api/unstable///contributors \N Contributors growth-maturity-decline "contributors" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:47 \N -20 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/community_engagement \N Community Engagement growth-maturity-decline "community-engagement" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:48 \N -21 growth-maturity-decline augur_db metric implemented unimplemented t /api/unstable/repo-groups//repos//sub-projects \N Sub Projects growth-maturity-decline "sub-projects" Insight Worker 0.0.1 augur_db 2019-06-20 22:49:50 \N -22 growth-maturity-decline ghtorrent timeseries implemented implemented t /api/unstable///timeseries/contribution_acceptance \N Contribution Acceptance growth-maturity-decline "contribution-acceptance" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:49:51 \N -23 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//code-changes \N Code Changes experimental "code-changes" Insight Worker 0.0.1 augur_db 2019-06-20 22:49:53 \N -24 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//pull-requests-merge-contributor-new \N Pull Requests Merge Contributor New experimental "pull-requests-merge-contributor-new" Insight Worker 0.0.1 augur_db 2019-06-20 22:49:55 \N -25 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//issues-first-time-opened \N Issues First Time Opened experimental "issues-first-time-opened" Insight Worker 0.0.1 augur_db 2019-06-20 22:49:56 \N -26 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//issues-first-time-closed \N Issues First Time Closed experimental "issues-first-time-closed" Insight Worker 0.0.1 augur_db 2019-06-20 22:49:58 \N -27 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//contributors-new \N Contributors New experimental "contributors-new" Insight Worker 0.0.1 augur_db 2019-06-20 22:49:59 \N -28 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//code-changes-lines \N Code Changes Lines experimental "code-changes-lines" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:01 \N -29 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//issues-new \N Issues New experimental "issues-new" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:02 \N -30 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//issues-closed \N Issues Closed experimental "issues-closed" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:04 \N -31 experimental augur_db metric implemented unimplemented f none \N Issue Duration experimental "issue-duration" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:05 \N -32 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//issue-backlog \N Issue Backlog experimental "issue-backlog" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:07 \N -33 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//issues-open-age \N Issues Open Age experimental "issues-open-age" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:08 \N -34 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//issues-closed-resolution-duration \N Issues Closed Resolution Duration experimental "issues-closed-resolution-duration" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:10 \N -35 experimental augur_db metric implemented unimplemented f none \N Lines Changed By Author experimental "lines-changed-by-author" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:11 \N -36 experimental augur_db git implemented unimplemented f /api/unstable/repo-groups \N Repo Groups experimental "repo-groups" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:13 \N -37 experimental augur_db git implemented unimplemented f /api/unstable/repos \N Downloaded Repos experimental "downloaded-repos" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:15 \N -38 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//open-issues-count \N Open Issues Count experimental "closed-issues-count" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:16 \N -39 experimental augur_db metric implemented unimplemented f /api/unstable/repo-groups//repos//closed-issues-count \N Closed Issues Count experimental "closed-issues-count" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:18 \N -40 experimental augur_db git implemented unimplemented f /api/unstable/repos// \N Get Repo experimental "get-repo" Insight Worker 0.0.1 augur_db 2019-06-20 22:50:19 \N -41 experimental downloads timeseries implemented implemented f /api/unstable///timeseries/downloads \N Downloads experimental "downloads" Insight Worker 0.0.1 downloads 2019-06-20 22:50:21 \N -42 experimental githubapi metric implemented unimplemented f /api/unstable///githubapi/pull_requests_closed \N Pull Requests Closed experimental "pull_requests_closed" Insight Worker 0.0.1 githubapi 2019-06-20 22:50:22 \N -43 experimental githubapi metric implemented unimplemented f /api/unstable///githubapi/pull_requests_merged \N Pull Requests Merged experimental "pull_requests_merged" Insight Worker 0.0.1 githubapi 2019-06-20 22:50:24 \N -44 experimental githubapi metric implemented unimplemented f /api/unstable///githubapi/pull_requests_open \N Pull Requests Open experimental "pull_requests_open" Insight Worker 0.0.1 githubapi 2019-06-20 22:50:25 \N -45 experimental githubapi metric implemented unimplemented t /api/unstable///githubapi/repository_size \N Repository Size experimental "repository-size" Insight Worker 0.0.1 githubapi 2019-06-20 22:50:27 \N -46 experimental githubapi metric implemented implemented t /api/unstable///bus_factor \N Bus Factor experimental "bus-factor" Insight Worker 0.0.1 githubapi 2019-06-20 22:50:28 \N -47 experimental githubapi timeseries implemented implemented f /api/unstable///timeseries/tags/major \N Major Tags experimental "major-tags" Insight Worker 0.0.1 githubapi 2019-06-20 22:50:30 \N -48 experimental githubapi timeseries implemented implemented f /api/unstable///timeseries/tags \N Tags experimental "tags" Insight Worker 0.0.1 githubapi 2019-06-20 22:50:31 \N -49 experimental facade git implemented unimplemented f /api/unstable/git/repos \N Downloaded Repos experimental "downloaded-repos" Insight Worker 0.0.1 facade 2019-06-20 22:50:33 \N -50 experimental facade git implemented implemented f /api/unstable/git/changes_by_author \N Lines Changed By Author experimental "lines-changed-by-author" Insight Worker 0.0.1 facade 2019-06-20 22:50:35 \N -51 experimental facade git implemented unimplemented f /api/unstable/git/lines_changed_by_week \N Lines Changed By Week experimental "lines-changed-by-week" Insight Worker 0.0.1 facade 2019-06-20 22:50:36 \N -52 experimental facade git implemented unimplemented f /api/unstable/git/lines_changed_by_month \N Lines Changed By Month experimental "lines-changed-by-month" Insight Worker 0.0.1 facade 2019-06-20 22:50:38 \N -53 experimental facade git implemented unimplemented f /api/unstable/git/commits_by_week \N Commits By Week experimental "commits-by-week" Insight Worker 0.0.1 facade 2019-06-20 22:50:40 \N -54 experimental facade git implemented implemented f /api/unstable/git/facade_project \N Facade Project experimental "facade-project" Insight Worker 0.0.1 facade 2019-06-20 22:50:41 \N -55 experimental facade metric implemented unimplemented f none \N Annual Commit Count Ranked By New Repo In Repo Group experimental "annual-commit-count-ranked-by-new-repo-in-repo-group" Insight Worker 0.0.1 facade 2019-06-20 22:50:43 \N -56 experimental facade metric implemented unimplemented f none \N Annual Lines Of Code Count Ranked By New Repo In Repo Group experimental "annual-lines-of-code-count-ranked-by-new-repo-in-repo-group" Insight Worker 0.0.1 facade 2019-06-20 22:50:44 \N -57 experimental facade metric implemented unimplemented f none \N Annual Commit Count Ranked By Repo In Repo Group experimental "annual-commit-count-ranked-by-repo-in-repo-group" Insight Worker 0.0.1 facade 2019-06-20 22:50:46 \N -58 experimental facade metric implemented unimplemented f none \N Annual Lines Of Code Count Ranked By Repo In Repo Group experimental "annual-lines-of-code-count-ranked-by-repo-in-repo-group" Insight Worker 0.0.1 facade 2019-06-20 22:50:48 \N -59 experimental facade metric implemented unimplemented f none \N Lines Of Code Commit Counts By Calendar Year Grouped experimental "lines-of-code-commit-counts-by-calendar-year-grouped" Insight Worker 0.0.1 facade 2019-06-20 22:50:49 \N -60 experimental facade metric implemented unimplemented f none \N Unaffiliated Contributors Lines Of Code Commit Counts By Calendar Year Grouped experimental "unaffiliated-contributors-lines-of-code-commit-counts-by-calendar-year-grouped" Insight Worker 0.0.1 facade 2019-06-20 22:50:51 \N -61 experimental facade metric implemented unimplemented f none \N Repo Group Lines Of Code Commit Counts Calendar Year Grouped experimental "repo-group-lines-of-code-commit-counts-calendar-year-grouped" Insight Worker 0.0.1 facade 2019-06-20 22:50:52 \N -62 experimental ghtorrent metric implemented implemented f /api/unstable///contributing_github_organizations \N Contributing Github Organizations experimental "contributing-github-organizations" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:50:54 \N -63 experimental ghtorrent timeseries implemented implemented f /api/unstable///timeseries/new_contributing_github_organizations \N New Contributing Github Organizations experimental "new-contributing-github-organizations" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:50:56 \N -64 experimental ghtorrent timeseries implemented implemented t /api/unstable///timeseries/issue_comments \N Issue Comments experimental "issue-comments" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:50:57 \N -65 experimental ghtorrent timeseries implemented implemented t /api/unstable///timeseries/pulls/made_closed \N Pull Requests Made Closed experimental "pull-requests-made-closed" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:50:59 \N -66 experimental ghtorrent timeseries implemented implemented t /api/unstable///timeseries/watchers \N Watchers experimental "watchers" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:00 \N -67 experimental ghtorrent timeseries implemented implemented f /api/unstable///timeseries/commits100 \N Commits100 experimental "commits100" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:02 \N -68 experimental ghtorrent timeseries implemented implemented f /api/unstable///timeseries/commits/comments \N Commit Comments experimental "commit-comments" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:03 \N -69 experimental ghtorrent metric implemented implemented f /api/unstable///committer_locations \N Committer Locations experimental "committer-locations" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:05 \N -70 experimental ghtorrent timeseries implemented implemented f /api/unstable///timeseries/total_committers \N Total Committers experimental "total-committers" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:07 \N -71 experimental ghtorrent timeseries implemented implemented f /api/unstable///timeseries/issues/activity \N Issue Activity experimental "issue-activity" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:08 \N -72 experimental ghtorrent timeseries implemented unimplemented f /api/unstable///timeseries/pulls/acceptance_rate \N Pull Request Acceptance Rate experimental "pull-request-acceptance-rate" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:10 \N -73 experimental ghtorrent metric implemented implemented f /api/unstable///community_age \N Community Age experimental "community-age" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:11 \N -74 experimental ghtorrent metric implemented unimplemented f /api/unstable///timeseries/contributions \N Contributions experimental "contributions" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:13 \N -75 experimental ghtorrent metric implemented implemented f /api/unstable///project_age \N Project Age experimental "project-age" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:14 \N -76 experimental ghtorrent timeseries implemented implemented f /api/unstable///timeseries/fakes \N Fakes experimental "fakes" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:16 \N -77 experimental ghtorrent timeseries implemented unimplemented f /api/unstable///timeseries/total_watchers \N Total Watchers experimental "total-watchers" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:18 \N -78 experimental ghtorrent timeseries implemented implemented f /api/unstable///timeseries/new_watchers \N New Watchers experimental "new-watchers" Insight Worker 0.0.1 ghtorrent 2019-06-20 22:51:19 \N -79 experimental librariesio metric implemented implemented f /api/unstable///dependencies \N Dependencies experimental "dependencies" Insight Worker 0.0.1 librariesio 2019-06-20 22:51:21 \N -80 experimental librariesio metric implemented implemented f /api/unstable///dependency_stats \N Dependency Stats experimental "dependency-stats" Insight Worker 0.0.1 librariesio 2019-06-20 22:51:23 \N -81 experimental librariesio metric implemented implemented f /api/unstable///dependents \N Dependents experimental "dependents" Insight Worker 0.0.1 librariesio 2019-06-20 22:51:25 \N -\. - - --- --- Data for Name: chaoss_user; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.chaoss_user (chaoss_id, chaoss_login_name, chaoss_login_hashword, chaoss_email, chaoss_text_phone, chaoss_first_name, chaoss_last_name, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: commit_comment_ref; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.commit_comment_ref (cmt_comment_id, cmt_id, repo_id, msg_id, user_id, body, line, "position", commit_comment_src_node_id, cmt_comment_src_id, created_at, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: commit_parents; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.commit_parents (cmt_id, parent_id, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: commits; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.commits (cmt_id, repo_id, cmt_commit_hash, cmt_author_name, cmt_author_raw_email, cmt_author_email, cmt_author_date, cmt_author_affiliation, cmt_committer_name, cmt_committer_raw_email, cmt_committer_email, cmt_committer_date, cmt_committer_affiliation, cmt_added, cmt_removed, cmt_whitespace, cmt_filename, cmt_date_attempted, cmt_ght_committer_id, cmt_ght_committed_at, cmt_committer_timestamp, cmt_author_timestamp, cmt_author_platform_username, tool_source, tool_version, data_source, data_collection_date, cmt_ght_author_id) FROM stdin; -\. - - --- --- Data for Name: contributor_affiliations; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.contributor_affiliations (ca_id, ca_domain, ca_start_date, ca_last_used, ca_affiliation, ca_active, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -1 samsung.com 1970-01-01 2018-08-01 18:37:54 Samsung 1 load 1.0 load 1970-01-01 00:00:00 -2 linuxfoundation.org 1970-01-01 2018-08-01 18:37:54 Linux Foundation 1 load 1.0 load 1970-01-01 00:00:00 -3 ibm.com 1970-01-01 2018-08-01 18:37:54 IBM 1 load 1.0 load 1970-01-01 00:00:00 -8 walmart.com 1970-01-01 2018-09-01 06:00:00 Walmart 1 load 1.0 load 1970-01-01 00:00:00 -9 exxonmobil.com 1970-01-01 2018-09-01 06:00:00 Exxon Mobil 1 load 1.0 load 1970-01-01 00:00:00 -10 ge.com 1970-01-01 2018-09-01 06:00:00 General Electric 1 load 1.0 load 1970-01-01 00:00:00 -11 dupont.com 1970-01-01 2018-09-01 06:00:00 DuPont 1 load 1.0 load 1970-01-01 00:00:00 -12 avnet.com 1970-01-01 2018-09-01 06:00:00 Avnet 1 load 1.0 load 1970-01-01 00:00:00 -13 macysinc.com 1970-01-01 2018-09-01 06:00:00 Macys 1 load 1.0 load 1970-01-01 00:00:00 -14 enterpriseproducts.com 1970-01-01 2018-09-01 06:00:00 Enterprise Products Partners 1 load 1.0 load 1970-01-01 00:00:00 -15 travelers.com 1970-01-01 2018-09-01 06:00:00 Travelers Cos. 1 load 1.0 load 1970-01-01 00:00:00 -16 pmi.com 1970-01-01 2018-09-01 06:00:00 Philip Morris International 1 load 1.0 load 1970-01-01 00:00:00 -17 riteaid.com 1970-01-01 2018-09-01 06:00:00 Rite Aid 1 load 1.0 load 1970-01-01 00:00:00 -18 techdata.com 1970-01-01 2018-09-01 06:00:00 Tech Data 1 load 1.0 load 1970-01-01 00:00:00 -25156 pivotal.io 1970-01-01 2020-03-25 00:30:57 VMware 1 Manual Entry 0.0.0 Gabe 2020-03-25 00:30:57 -25157 vmware.com 1970-01-01 2020-03-25 00:33:35 VMware 1 Manual Entry 0.0.0 Gabe 2020-03-25 00:33:35 -25158 rabbitmq.com 1970-01-01 2020-03-25 00:33:43 VMware 1 Manual Entry 0.0.0 Gabe 2020-03-25 00:33:43 -25161 pivotallabs.com 1970-01-01 2020-03-25 00:43:53 VMware 1 Manual Entry 0.0.0 Gabe 2020-03-25 00:43:53 -25162 cloudcredo.com 1970-01-01 2020-03-25 00:44:18 VMware 1 Manual Entry 0.0.0 Gabe 2020-03-25 00:44:18 -25163 gopivotal.com 1970-01-01 2020-03-25 00:44:25 VMware 1 Manual Entry 0.0.0 Gabe 2020-03-25 00:44:25 -25164 heptio.com 1970-01-01 2020-03-25 00:44:32 VMware 1 Manual Entry 0.0.0 Gabe 2020-03-25 00:44:32 -19 aboutmcdonalds.com 1970-01-01 2018-09-01 06:00:00 McDonalds 1 load 1.0 load 1970-01-01 00:00:00 -20 qualcomm.com 1970-01-01 2018-09-01 06:00:00 Qualcomm 1 load 1.0 load 1970-01-01 00:00:00 -21 amerisourcebergen.com 1970-01-01 2018-09-01 06:00:00 AmerisourceBergen 1 load 1.0 load 1970-01-01 00:00:00 -22 searsholdings.com 1970-01-01 2018-09-01 06:00:00 Sears Holdings 1 load 1.0 load 1970-01-01 00:00:00 -23 capitalone.com 1970-01-01 2018-09-01 06:00:00 Capital One Financial 1 load 1.0 load 1970-01-01 00:00:00 -24 emc.com 1970-01-01 2018-09-01 06:00:00 EMC 1 load 1.0 load 1970-01-01 00:00:00 -25 usaa.com 1970-01-01 2018-09-01 06:00:00 USAA 1 load 1.0 load 1970-01-01 00:00:00 -26 duke-energy.com 1970-01-01 2018-09-01 06:00:00 Duke Energy 1 load 1.0 load 1970-01-01 00:00:00 -27 twc.com 1970-01-01 2018-09-01 06:00:00 Time Warner Cable 1 load 1.0 load 1970-01-01 00:00:00 -28 halliburton.com 1970-01-01 2018-09-01 06:00:00 Halliburton 1 load 1.0 load 1970-01-01 00:00:00 -29 northropgrumman.com 1970-01-01 2018-09-01 06:00:00 Northrop Grumman 1 load 1.0 load 1970-01-01 00:00:00 -30 arrow.com 1970-01-01 2018-09-01 06:00:00 Arrow Electronics 1 load 1.0 load 1970-01-01 00:00:00 -31 raytheon.com 1970-01-01 2018-09-01 06:00:00 Raytheon 1 load 1.0 load 1970-01-01 00:00:00 -32 verizon.com 1970-01-01 2018-09-01 06:00:00 Verizon 1 load 1.0 load 1970-01-01 00:00:00 -33 plainsallamerican.com 1970-01-01 2018-09-01 06:00:00 Plains GP Holdings 1 load 1.0 load 1970-01-01 00:00:00 -34 usfoods.com 1970-01-01 2018-09-01 06:00:00 US Foods 1 load 1.0 load 1970-01-01 00:00:00 -35 abbvie.com 1970-01-01 2018-09-01 06:00:00 AbbVie 1 load 1.0 load 1970-01-01 00:00:00 -36 centene.com 1970-01-01 2018-09-01 06:00:00 Centene 1 load 1.0 load 1970-01-01 00:00:00 -37 chs.net 1970-01-01 2018-09-01 06:00:00 Community Health Systems 1 load 1.0 load 1970-01-01 00:00:00 -38 arconic.com 1970-01-01 2018-09-01 06:00:00 Arconic 1 load 1.0 load 1970-01-01 00:00:00 -39 internationalpaper.com 1970-01-01 2018-09-01 06:00:00 International Paper 1 load 1.0 load 1970-01-01 00:00:00 -40 emerson.com 1970-01-01 2018-09-01 06:00:00 Emerson Electric 1 load 1.0 load 1970-01-01 00:00:00 -41 up.com 1970-01-01 2018-09-01 06:00:00 Union Pacific 1 load 1.0 load 1970-01-01 00:00:00 -42 amgen.com 1970-01-01 2018-09-01 06:00:00 Amgen 1 load 1.0 load 1970-01-01 00:00:00 -43 chevron.com 1970-01-01 2018-09-01 06:00:00 Chevron 1 load 1.0 load 1970-01-01 00:00:00 -44 usbank.com 1970-01-01 2018-09-01 06:00:00 U.S. Bancorp 1 load 1.0 load 1970-01-01 00:00:00 -45 staples.com 1970-01-01 2018-09-01 06:00:00 Staples 1 load 1.0 load 1970-01-01 00:00:00 -46 danaher.com 1970-01-01 2018-09-01 06:00:00 Danaher 1 load 1.0 load 1970-01-01 00:00:00 -47 whirlpoolcorp.com 1970-01-01 2018-09-01 06:00:00 Whirlpool 1 load 1.0 load 1970-01-01 00:00:00 -48 aflac.com 1970-01-01 2018-09-01 06:00:00 Aflac 1 load 1.0 load 1970-01-01 00:00:00 -49 autonation.com 1970-01-01 2018-09-01 06:00:00 AutoNation 1 load 1.0 load 1970-01-01 00:00:00 -50 progressive.com 1970-01-01 2018-09-01 06:00:00 Progressive 1 load 1.0 load 1970-01-01 00:00:00 -51 abbott.com 1970-01-01 2018-09-01 06:00:00 Abbott Laboratories 1 load 1.0 load 1970-01-01 00:00:00 -52 dollargeneral.com 1970-01-01 2018-09-01 06:00:00 Dollar General 1 load 1.0 load 1970-01-01 00:00:00 -53 tenethealth.com 1970-01-01 2018-09-01 06:00:00 Tenet Healthcare 1 load 1.0 load 1970-01-01 00:00:00 -54 costco.com 1970-01-01 2018-09-01 06:00:00 Costco 1 load 1.0 load 1970-01-01 00:00:00 -55 lilly.com 1970-01-01 2018-09-01 06:00:00 Eli Lilly 1 load 1.0 load 1970-01-01 00:00:00 -56 southwest.com 1970-01-01 2018-09-01 06:00:00 Southwest Airlines 1 load 1.0 load 1970-01-01 00:00:00 -57 penskeautomotive.com 1970-01-01 2018-09-01 06:00:00 Penske Automotive Group 1 load 1.0 load 1970-01-01 00:00:00 -58 manpowergroup.com 1970-01-01 2018-09-01 06:00:00 ManpowerGroup 1 load 1.0 load 1970-01-01 00:00:00 -59 kohlscorporation.com 1970-01-01 2018-09-01 06:00:00 Kohls 1 load 1.0 load 1970-01-01 00:00:00 -60 starbucks.com 1970-01-01 2018-09-01 06:00:00 Starbucks 1 load 1.0 load 1970-01-01 00:00:00 -61 paccar.com 1970-01-01 2018-09-01 06:00:00 Paccar 1 load 1.0 load 1970-01-01 00:00:00 -62 cummins.com 1970-01-01 2018-09-01 06:00:00 Cummins 1 load 1.0 load 1970-01-01 00:00:00 -63 altria.com 1970-01-01 2018-09-01 06:00:00 Altria Group 1 load 1.0 load 1970-01-01 00:00:00 -64 xerox.com 1970-01-01 2018-09-01 06:00:00 Xerox 1 load 1.0 load 1970-01-01 00:00:00 -65 fanniemae.com 1970-01-01 2018-09-01 06:00:00 Fannie Mae 1 load 1.0 load 1970-01-01 00:00:00 -66 kimberly-clark.com 1970-01-01 2018-09-01 06:00:00 Kimberly-Clark 1 load 1.0 load 1970-01-01 00:00:00 -67 thehartford.com 1970-01-01 2018-09-01 06:00:00 Hartford Financial Services Group 1 load 1.0 load 1970-01-01 00:00:00 -68 kraftheinzcompany.com 1970-01-01 2018-09-01 06:00:00 Kraft Heinz 1 load 1.0 load 1970-01-01 00:00:00 -69 lear.com 1970-01-01 2018-09-01 06:00:00 Lear 1 load 1.0 load 1970-01-01 00:00:00 -70 fluor.com 1970-01-01 2018-09-01 06:00:00 Fluor 1 load 1.0 load 1970-01-01 00:00:00 -71 aecom.com 1970-01-01 2018-09-01 06:00:00 AECOM 1 load 1.0 load 1970-01-01 00:00:00 -72 facebook.com 1970-01-01 2018-09-01 06:00:00 Facebook 1 load 1.0 load 1970-01-01 00:00:00 -73 jabil.com 1970-01-01 2018-09-01 06:00:00 Jabil Circuit 1 load 1.0 load 1970-01-01 00:00:00 -74 centurylink.com 1970-01-01 2018-09-01 06:00:00 CenturyLink 1 load 1.0 load 1970-01-01 00:00:00 -75 supervalu.com 1970-01-01 2018-09-01 06:00:00 Supervalu 1 load 1.0 load 1970-01-01 00:00:00 -76 thekrogerco.com 1970-01-01 2018-09-01 06:00:00 Kroger 1 load 1.0 load 1970-01-01 00:00:00 -77 generalmills.com 1970-01-01 2018-09-01 06:00:00 General Mills 1 load 1.0 load 1970-01-01 00:00:00 -78 southerncompany.com 1970-01-01 2018-09-01 06:00:00 Southern 1 load 1.0 load 1970-01-01 00:00:00 -79 nexteraenergy.com 1970-01-01 2018-09-01 06:00:00 NextEra Energy 1 load 1.0 load 1970-01-01 00:00:00 -80 thermofisher.com 1970-01-01 2018-09-01 06:00:00 Thermo Fisher Scientific 1 load 1.0 load 1970-01-01 00:00:00 -81 aep.com 1970-01-01 2018-09-01 06:00:00 American Electric Power 1 load 1.0 load 1970-01-01 00:00:00 -82 pge.com 1970-01-01 2018-09-01 06:00:00 PG&E Corp. 1 load 1.0 load 1970-01-01 00:00:00 -83 nglenergypartners.com 1970-01-01 2018-09-01 06:00:00 NGL Energy Partners 1 load 1.0 load 1970-01-01 00:00:00 -84 bms.com 1970-01-01 2018-09-01 06:00:00 Bristol-Myers Squibb 1 load 1.0 load 1970-01-01 00:00:00 -85 goodyear.com 1970-01-01 2018-09-01 06:00:00 Goodyear Tire & Rubber 1 load 1.0 load 1970-01-01 00:00:00 -86 nucor.com 1970-01-01 2018-09-01 06:00:00 Nucor 1 load 1.0 load 1970-01-01 00:00:00 -87 amazon.com 1970-01-01 2018-09-01 06:00:00 Amazon.com 1 load 1.0 load 1970-01-01 00:00:00 -88 pnc.com 1970-01-01 2018-09-01 06:00:00 PNC Financial Services Group 1 load 1.0 load 1970-01-01 00:00:00 -89 healthnet.com 1970-01-01 2018-09-01 06:00:00 Health Net 1 load 1.0 load 1970-01-01 00:00:00 -90 micron.com 1970-01-01 2018-09-01 06:00:00 Micron Technology 1 load 1.0 load 1970-01-01 00:00:00 -91 colgatepalmolive.com 1970-01-01 2018-09-01 06:00:00 Colgate-Palmolive 1 load 1.0 load 1970-01-01 00:00:00 -92 fcx.com 1970-01-01 2018-09-01 06:00:00 Freeport-McMoRan 1 load 1.0 load 1970-01-01 00:00:00 -93 conagrafoods.com 1970-01-01 2018-09-01 06:00:00 ConAgra Foods 1 load 1.0 load 1970-01-01 00:00:00 -94 gapinc.com 1970-01-01 2018-09-01 06:00:00 Gap 1 load 1.0 load 1970-01-01 00:00:00 -95 bakerhughes.com 1970-01-01 2018-09-01 06:00:00 Baker Hughes 1 load 1.0 load 1970-01-01 00:00:00 -96 bnymellon.com 1970-01-01 2018-09-01 06:00:00 Bank of New York Mellon Corp. 1 load 1.0 load 1970-01-01 00:00:00 -97 dollartree.com 1970-01-01 2018-09-01 06:00:00 Dollar Tree 1 load 1.0 load 1970-01-01 00:00:00 -98 walgreensbootsalliance.com 1970-01-01 2018-09-01 06:00:00 Walgreens 1 load 1.0 load 1970-01-01 00:00:00 -99 wholefoodsmarket.com 1970-01-01 2018-09-01 06:00:00 Whole Foods Market 1 load 1.0 load 1970-01-01 00:00:00 -100 ppg.com 1970-01-01 2018-09-01 06:00:00 PPG Industries 1 load 1.0 load 1970-01-01 00:00:00 -101 genpt.com 1970-01-01 2018-09-01 06:00:00 Genuine Parts 1 load 1.0 load 1970-01-01 00:00:00 -102 ielp.com 1970-01-01 2018-09-01 06:00:00 Icahn Enterprises 1 load 1.0 load 1970-01-01 00:00:00 -103 pfgc.com 1970-01-01 2018-09-01 06:00:00 Performance Food Group 1 load 1.0 load 1970-01-01 00:00:00 -104 omnicomgroup.com 1970-01-01 2018-09-01 06:00:00 Omnicom Group 1 load 1.0 load 1970-01-01 00:00:00 -105 dish.com 1970-01-01 2018-09-01 06:00:00 DISH Network 1 load 1.0 load 1970-01-01 00:00:00 -106 firstenergycorp.com 1970-01-01 2018-09-01 06:00:00 FirstEnergy 1 load 1.0 load 1970-01-01 00:00:00 -107 monsanto.com 1970-01-01 2018-09-01 06:00:00 Monsanto 1 load 1.0 load 1970-01-01 00:00:00 -108 aes.com 1970-01-01 2018-09-01 06:00:00 AES 1 load 1.0 load 1970-01-01 00:00:00 -109 hp.com 1970-01-01 2018-09-01 06:00:00 HP 1 load 1.0 load 1970-01-01 00:00:00 -110 carmax.com 1970-01-01 2018-09-01 06:00:00 CarMax 1 load 1.0 load 1970-01-01 00:00:00 -111 nov.com 1970-01-01 2018-09-01 06:00:00 National Oilwell Varco 1 load 1.0 load 1970-01-01 00:00:00 -112 nrgenergy.com 1970-01-01 2018-09-01 06:00:00 NRG Energy 1 load 1.0 load 1970-01-01 00:00:00 -113 westerndigital.com 1970-01-01 2018-09-01 06:00:00 Western Digital 1 load 1.0 load 1970-01-01 00:00:00 -114 marriott.com 1970-01-01 2018-09-01 06:00:00 Marriott International 1 load 1.0 load 1970-01-01 00:00:00 -115 officedepot.com 1970-01-01 2018-09-01 06:00:00 Office Depot 1 load 1.0 load 1970-01-01 00:00:00 -116 nordstrom.com 1970-01-01 2018-09-01 06:00:00 Nordstrom 1 load 1.0 load 1970-01-01 00:00:00 -117 kindermorgan.com 1970-01-01 2018-09-01 06:00:00 Kinder Morgan 1 load 1.0 load 1970-01-01 00:00:00 -118 aramark.com 1970-01-01 2018-09-01 06:00:00 Aramark 1 load 1.0 load 1970-01-01 00:00:00 -119 davita.com 1970-01-01 2018-09-01 06:00:00 DaVita 1 load 1.0 load 1970-01-01 00:00:00 -120 apple.com 1970-01-01 2018-09-01 06:00:00 Apple 1 load 1.0 load 1970-01-01 00:00:00 -121 cardinal.com 1970-01-01 2018-09-01 06:00:00 Cardinal Health 1 load 1.0 load 1970-01-01 00:00:00 -122 molinahealthcare.com 1970-01-01 2018-09-01 06:00:00 Molina Healthcare 1 load 1.0 load 1970-01-01 00:00:00 -123 wellcare.com 1970-01-01 2018-09-01 06:00:00 WellCare Health Plans 1 load 1.0 load 1970-01-01 00:00:00 -124 cbscorporation.com 1970-01-01 2018-09-01 06:00:00 CBS 1 load 1.0 load 1970-01-01 00:00:00 -125 visa.com 1970-01-01 2018-09-01 06:00:00 Visa 1 load 1.0 load 1970-01-01 00:00:00 -126 lfg.com 1970-01-01 2018-09-01 06:00:00 Lincoln National 1 load 1.0 load 1970-01-01 00:00:00 -127 ecolab.com 1970-01-01 2018-09-01 06:00:00 Ecolab 1 load 1.0 load 1970-01-01 00:00:00 -128 kelloggcompany.com 1970-01-01 2018-09-01 06:00:00 Kellogg 1 load 1.0 load 1970-01-01 00:00:00 -129 chrobinson.com 1970-01-01 2018-09-01 06:00:00 C.H. Robinson Worldwide 1 load 1.0 load 1970-01-01 00:00:00 -130 textron.com 1970-01-01 2018-09-01 06:00:00 Textron 1 load 1.0 load 1970-01-01 00:00:00 -131 loews.com 1970-01-01 2018-09-01 06:00:00 Loews 1 load 1.0 load 1970-01-01 00:00:00 -132 express-scripts.com 1970-01-01 2018-09-01 06:00:00 Express Scripts Holding 1 load 1.0 load 1970-01-01 00:00:00 -133 itw.com 1970-01-01 2018-09-01 06:00:00 Illinois Tool Works 1 load 1.0 load 1970-01-01 00:00:00 -134 synnex.com 1970-01-01 2018-09-01 06:00:00 Synnex 1 load 1.0 load 1970-01-01 00:00:00 -135 viacom.com 1970-01-01 2018-09-01 06:00:00 Viacom 1 load 1.0 load 1970-01-01 00:00:00 -136 hollyfrontier.com 1970-01-01 2018-09-01 06:00:00 HollyFrontier 1 load 1.0 load 1970-01-01 00:00:00 -137 landolakesinc.com 1970-01-01 2018-09-01 06:00:00 Land O Lakes 1 load 1.0 load 1970-01-01 00:00:00 -138 devonenergy.com 1970-01-01 2018-09-01 06:00:00 Devon Energy 1 load 1.0 load 1970-01-01 00:00:00 -139 pbfenergy.com 1970-01-01 2018-09-01 06:00:00 PBF Energy 1 load 1.0 load 1970-01-01 00:00:00 -140 yum.com 1970-01-01 2018-09-01 06:00:00 Yum Brands 1 load 1.0 load 1970-01-01 00:00:00 -141 ti.com 1970-01-01 2018-09-01 06:00:00 Texas Instruments 1 load 1.0 load 1970-01-01 00:00:00 -142 cdw.com 1970-01-01 2018-09-01 06:00:00 CDW 1 load 1.0 load 1970-01-01 00:00:00 -143 jpmorganchase.com 1970-01-01 2018-09-01 06:00:00 J.P. Morgan Chase 1 load 1.0 load 1970-01-01 00:00:00 -144 wm.com 1970-01-01 2018-09-01 06:00:00 Waste Management 1 load 1.0 load 1970-01-01 00:00:00 -145 mmc.com 1970-01-01 2018-09-01 06:00:00 Marsh & McLennan 1 load 1.0 load 1970-01-01 00:00:00 -146 chk.com 1970-01-01 2018-09-01 06:00:00 Chesapeake Energy 1 load 1.0 load 1970-01-01 00:00:00 -147 parker.com 1970-01-01 2018-09-01 06:00:00 Parker-Hannifin 1 load 1.0 load 1970-01-01 00:00:00 -148 oxy.com 1970-01-01 2018-09-01 06:00:00 Occidental Petroleum 1 load 1.0 load 1970-01-01 00:00:00 -149 guardianlife.com 1970-01-01 2018-09-01 06:00:00 Guardian Life Ins. Co. of America 1 load 1.0 load 1970-01-01 00:00:00 -150 farmers.com 1970-01-01 2018-09-01 06:00:00 Farmers Insurance Exchange 1 load 1.0 load 1970-01-01 00:00:00 -151 jcpenney.com 1970-01-01 2018-09-01 06:00:00 J.C. Penney 1 load 1.0 load 1970-01-01 00:00:00 -152 conedison.com 1970-01-01 2018-09-01 06:00:00 Consolidated Edison 1 load 1.0 load 1970-01-01 00:00:00 -153 cognizant.com 1970-01-01 2018-09-01 06:00:00 Cognizant Technology Solutions 1 load 1.0 load 1970-01-01 00:00:00 -154 boeing.com 1970-01-01 2018-09-01 06:00:00 Boeing 1 load 1.0 load 1970-01-01 00:00:00 -155 vfc.com 1970-01-01 2018-09-01 06:00:00 VF 1 load 1.0 load 1970-01-01 00:00:00 -156 ameriprise.com 1970-01-01 2018-09-01 06:00:00 Ameriprise Financial 1 load 1.0 load 1970-01-01 00:00:00 -157 csc.com 1970-01-01 2018-09-01 06:00:00 Computer Sciences 1 load 1.0 load 1970-01-01 00:00:00 -158 lb.com 1970-01-01 2018-09-01 06:00:00 L Brands 1 load 1.0 load 1970-01-01 00:00:00 -159 jacobs.com 1970-01-01 2018-09-01 06:00:00 Jacobs Engineering Group 1 load 1.0 load 1970-01-01 00:00:00 -160 principal.com 1970-01-01 2018-09-01 06:00:00 Principal Financial Group 1 load 1.0 load 1970-01-01 00:00:00 -161 rossstores.com 1970-01-01 2018-09-01 06:00:00 Ross Stores 1 load 1.0 load 1970-01-01 00:00:00 -162 bedbathandbeyond.com 1970-01-01 2018-09-01 06:00:00 Bed Bath & Beyond 1 load 1.0 load 1970-01-01 00:00:00 -163 csx.com 1970-01-01 2018-09-01 06:00:00 CSX 1 load 1.0 load 1970-01-01 00:00:00 -164 toysrusinc.com 1970-01-01 2018-09-01 06:00:00 Toys \\"R\\" Us 1 load 1.0 load 1970-01-01 00:00:00 -165 microsoft.com 1970-01-01 2018-09-01 06:00:00 Microsoft 1 load 1.0 load 1970-01-01 00:00:00 -166 sands.com 1970-01-01 2018-09-01 06:00:00 Las Vegas Sands 1 load 1.0 load 1970-01-01 00:00:00 -167 leucadia.com 1970-01-01 2018-09-01 06:00:00 Leucadia National 1 load 1.0 load 1970-01-01 00:00:00 -168 dom.com 1970-01-01 2018-09-01 06:00:00 Dominion Resources 1 load 1.0 load 1970-01-01 00:00:00 -169 ussteel.com 1970-01-01 2018-09-01 06:00:00 United States Steel 1 load 1.0 load 1970-01-01 00:00:00 -170 l-3com.com 1970-01-01 2018-09-01 06:00:00 L-3 Communications 1 load 1.0 load 1970-01-01 00:00:00 -171 edisoninvestor.com 1970-01-01 2018-09-01 06:00:00 Edison International 1 load 1.0 load 1970-01-01 00:00:00 -172 entergy.com 1970-01-01 2018-09-01 06:00:00 Entergy 1 load 1.0 load 1970-01-01 00:00:00 -173 adp.com 1970-01-01 2018-09-01 06:00:00 ADP 1 load 1.0 load 1970-01-01 00:00:00 -174 firstdata.com 1970-01-01 2018-09-01 06:00:00 First Data 1 load 1.0 load 1970-01-01 00:00:00 -175 blackrock.com 1970-01-01 2018-09-01 06:00:00 BlackRock 1 load 1.0 load 1970-01-01 00:00:00 -176 bankofamerica.com 1970-01-01 2018-09-01 06:00:00 Bank of America Corp. 1 load 1.0 load 1970-01-01 00:00:00 -177 westrock.com 1970-01-01 2018-09-01 06:00:00 WestRock 1 load 1.0 load 1970-01-01 00:00:00 -178 voya.com 1970-01-01 2018-09-01 06:00:00 Voya Financial 1 load 1.0 load 1970-01-01 00:00:00 -179 sherwin.com 1970-01-01 2018-09-01 06:00:00 Sherwin-Williams 1 load 1.0 load 1970-01-01 00:00:00 -180 hiltonworldwide.com 1970-01-01 2018-09-01 06:00:00 Hilton Worldwide Holdings 1 load 1.0 load 1970-01-01 00:00:00 -181 rrdonnelley.com 1970-01-01 2018-09-01 06:00:00 R.R. Donnelley & Sons 1 load 1.0 load 1970-01-01 00:00:00 -182 stanleyblackanddecker.com 1970-01-01 2018-09-01 06:00:00 Stanley Black & Decker 1 load 1.0 load 1970-01-01 00:00:00 -183 xcelenergy.com 1970-01-01 2018-09-01 06:00:00 Xcel Energy 1 load 1.0 load 1970-01-01 00:00:00 -184 corporate.murphyusa.com 1970-01-01 2018-09-01 06:00:00 Murphy USA 1 load 1.0 load 1970-01-01 00:00:00 -185 cbre.com 1970-01-01 2018-09-01 06:00:00 CBRE Group 1 load 1.0 load 1970-01-01 00:00:00 -186 drhorton.com 1970-01-01 2018-09-01 06:00:00 D.R. Horton 1 load 1.0 load 1970-01-01 00:00:00 -187 wellsfargo.com 1970-01-01 2018-09-01 06:00:00 Wells Fargo 1 load 1.0 load 1970-01-01 00:00:00 -188 elcompanies.com 1970-01-01 2018-09-01 06:00:00 Estee Lauder 1 load 1.0 load 1970-01-01 00:00:00 -189 praxair.com 1970-01-01 2018-09-01 06:00:00 Praxair 1 load 1.0 load 1970-01-01 00:00:00 -190 biogen.com 1970-01-01 2018-09-01 06:00:00 Biogen 1 load 1.0 load 1970-01-01 00:00:00 -191 statestreet.com 1970-01-01 2018-09-01 06:00:00 State Street Corp. 1 load 1.0 load 1970-01-01 00:00:00 -192 unum.com 1970-01-01 2018-09-01 06:00:00 Unum Group 1 load 1.0 load 1970-01-01 00:00:00 -193 reynoldsamerican.com 1970-01-01 2018-09-01 06:00:00 Reynolds American 1 load 1.0 load 1970-01-01 00:00:00 -194 group1auto.com 1970-01-01 2018-09-01 06:00:00 Group 1 Automotive 1 load 1.0 load 1970-01-01 00:00:00 -195 henryschein.com 1970-01-01 2018-09-01 06:00:00 Henry Schein 1 load 1.0 load 1970-01-01 00:00:00 -196 hertz.com 1970-01-01 2018-09-01 06:00:00 Hertz Global Holdings 1 load 1.0 load 1970-01-01 00:00:00 -197 nscorp.com 1970-01-01 2018-09-01 06:00:00 Norfolk Southern 1 load 1.0 load 1970-01-01 00:00:00 -198 homedepot.com 1970-01-01 2018-09-01 06:00:00 Home Depot 1 load 1.0 load 1970-01-01 00:00:00 -199 rgare.com 1970-01-01 2018-09-01 06:00:00 Reinsurance Group of America 1 load 1.0 load 1970-01-01 00:00:00 -200 pseg.com 1970-01-01 2018-09-01 06:00:00 Public Service Enterprise Group 1 load 1.0 load 1970-01-01 00:00:00 -201 bbt.com 1970-01-01 2018-09-01 06:00:00 BB&T Corp. 1 load 1.0 load 1970-01-01 00:00:00 -202 dteenergy.com 1970-01-01 2018-09-01 06:00:00 DTE Energy 1 load 1.0 load 1970-01-01 00:00:00 -203 assurant.com 1970-01-01 2018-09-01 06:00:00 Assurant 1 load 1.0 load 1970-01-01 00:00:00 -204 globalp.com 1970-01-01 2018-09-01 06:00:00 Global Partners 1 load 1.0 load 1970-01-01 00:00:00 -205 huntsman.com 1970-01-01 2018-09-01 06:00:00 Huntsman 1 load 1.0 load 1970-01-01 00:00:00 -206 bd.com 1970-01-01 2018-09-01 06:00:00 Becton Dickinson 1 load 1.0 load 1970-01-01 00:00:00 -207 sempra.com 1970-01-01 2018-09-01 06:00:00 Sempra Energy 1 load 1.0 load 1970-01-01 00:00:00 -208 autozone.com 1970-01-01 2018-09-01 06:00:00 AutoZone 1 load 1.0 load 1970-01-01 00:00:00 -209 citigroup.com 1970-01-01 2018-09-01 06:00:00 Citigroup 1 load 1.0 load 1970-01-01 00:00:00 -210 navistar.com 1970-01-01 2018-09-01 06:00:00 Navistar International 1 load 1.0 load 1970-01-01 00:00:00 -211 precast.com 1970-01-01 2018-09-01 06:00:00 Precision Castparts 1 load 1.0 load 1970-01-01 00:00:00 -212 discoverfinancial.com 1970-01-01 2018-09-01 06:00:00 Discover Financial Services 1 load 1.0 load 1970-01-01 00:00:00 -213 libertyinteractive.com 1970-01-01 2018-09-01 06:00:00 Liberty Interactive 1 load 1.0 load 1970-01-01 00:00:00 -214 grainger.com 1970-01-01 2018-09-01 06:00:00 W.W. Grainger 1 load 1.0 load 1970-01-01 00:00:00 -215 baxter.com 1970-01-01 2018-09-01 06:00:00 Baxter International 1 load 1.0 load 1970-01-01 00:00:00 -216 stryker.com 1970-01-01 2018-09-01 06:00:00 Stryker 1 load 1.0 load 1970-01-01 00:00:00 -217 airproducts.com 1970-01-01 2018-09-01 06:00:00 Air Products & Chemicals 1 load 1.0 load 1970-01-01 00:00:00 -218 wnr.com 1970-01-01 2018-09-01 06:00:00 Western Refining 1 load 1.0 load 1970-01-01 00:00:00 -219 uhsinc.com 1970-01-01 2018-09-01 06:00:00 Universal Health Services 1 load 1.0 load 1970-01-01 00:00:00 -220 phillips66.com 1970-01-01 2018-09-01 06:00:00 Phillips 66 1 load 1.0 load 1970-01-01 00:00:00 -221 owens-minor.com 1970-01-01 2018-09-01 06:00:00 Owens & Minor 1 load 1.0 load 1970-01-01 00:00:00 -222 charter.com 1970-01-01 2018-09-01 06:00:00 Charter Communications 1 load 1.0 load 1970-01-01 00:00:00 -223 advanceautoparts.com 1970-01-01 2018-09-01 06:00:00 Advance Auto Parts 1 load 1.0 load 1970-01-01 00:00:00 -224 mastercard.com 1970-01-01 2018-09-01 06:00:00 MasterCard 1 load 1.0 load 1970-01-01 00:00:00 -225 appliedmaterials.com 1970-01-01 2018-09-01 06:00:00 Applied Materials 1 load 1.0 load 1970-01-01 00:00:00 -226 eastman.com 1970-01-01 2018-09-01 06:00:00 Eastman Chemical 1 load 1.0 load 1970-01-01 00:00:00 -227 sonicautomotive.com 1970-01-01 2018-09-01 06:00:00 Sonic Automotive 1 load 1.0 load 1970-01-01 00:00:00 -228 ally.com 1970-01-01 2018-09-01 06:00:00 Ally Financial 1 load 1.0 load 1970-01-01 00:00:00 -229 cstbrands.com 1970-01-01 2018-09-01 06:00:00 CST Brands 1 load 1.0 load 1970-01-01 00:00:00 -230 ebay.com 1970-01-01 2018-09-01 06:00:00 eBay 1 load 1.0 load 1970-01-01 00:00:00 -231 berkshirehathaway.com 1970-01-01 2018-09-01 06:00:00 Berkshire Hathaway 1 load 1.0 load 1970-01-01 00:00:00 -233 lennar.com 1970-01-01 2018-09-01 06:00:00 Lennar 1 load 1.0 load 1970-01-01 00:00:00 -234 gamestopcorp.com 1970-01-01 2018-09-01 06:00:00 GameStop 1 load 1.0 load 1970-01-01 00:00:00 -235 rsac.com 1970-01-01 2018-09-01 06:00:00 Reliance Steel & Aluminum 1 load 1.0 load 1970-01-01 00:00:00 -236 hormelfoods.com 1970-01-01 2018-09-01 06:00:00 Hormel Foods 1 load 1.0 load 1970-01-01 00:00:00 -237 celgene.com 1970-01-01 2018-09-01 06:00:00 Celgene 1 load 1.0 load 1970-01-01 00:00:00 -238 genworth.com 1970-01-01 2018-09-01 06:00:00 Genworth Financial 1 load 1.0 load 1970-01-01 00:00:00 -239 paypal.com 1970-01-01 2018-09-01 06:00:00 PayPal Holdings 1 load 1.0 load 1970-01-01 00:00:00 -240 pricelinegroup.com 1970-01-01 2018-09-01 06:00:00 Priceline Group 1 load 1.0 load 1970-01-01 00:00:00 -241 mgmresorts.com 1970-01-01 2018-09-01 06:00:00 MGM Resorts International 1 load 1.0 load 1970-01-01 00:00:00 -242 autoliv.com 1970-01-01 2018-09-01 06:00:00 Autoliv 1 load 1.0 load 1970-01-01 00:00:00 -243 valero.com 1970-01-01 2018-09-01 06:00:00 Valero Energy 1 load 1.0 load 1970-01-01 00:00:00 -244 fnf.com 1970-01-01 2018-09-01 06:00:00 Fidelity National Financial 1 load 1.0 load 1970-01-01 00:00:00 -245 republicservices.com 1970-01-01 2018-09-01 06:00:00 Republic Services 1 load 1.0 load 1970-01-01 00:00:00 -246 corning.com 1970-01-01 2018-09-01 06:00:00 Corning 1 load 1.0 load 1970-01-01 00:00:00 -247 kiewit.com 1970-01-01 2018-09-01 06:00:00 Peter Kiewit Sons 1 load 1.0 load 1970-01-01 00:00:00 -248 univar.com 1970-01-01 2018-09-01 06:00:00 Univar 1 load 1.0 load 1970-01-01 00:00:00 -249 mosaicco.com 1970-01-01 2018-09-01 06:00:00 Mosaic 1 load 1.0 load 1970-01-01 00:00:00 -250 core-mark.com 1970-01-01 2018-09-01 06:00:00 Core-Mark Holding 1 load 1.0 load 1970-01-01 00:00:00 -251 thrivent.com 1970-01-01 2018-09-01 06:00:00 Thrivent Financial for Lutherans 1 load 1.0 load 1970-01-01 00:00:00 -252 c-a-m.com 1970-01-01 2018-09-01 06:00:00 Cameron International 1 load 1.0 load 1970-01-01 00:00:00 -253 hdsupply.com 1970-01-01 2018-09-01 06:00:00 HD Supply Holdings 1 load 1.0 load 1970-01-01 00:00:00 -254 antheminc.com 1970-01-01 2018-09-01 06:00:00 Anthem 1 load 1.0 load 1970-01-01 00:00:00 -255 crowncork.com 1970-01-01 2018-09-01 06:00:00 Crown Holdings 1 load 1.0 load 1970-01-01 00:00:00 -256 eogresources.com 1970-01-01 2018-09-01 06:00:00 EOG Resources 1 load 1.0 load 1970-01-01 00:00:00 -257 veritivcorp.com 1970-01-01 2018-09-01 06:00:00 Veritiv 1 load 1.0 load 1970-01-01 00:00:00 -258 anadarko.com 1970-01-01 2018-09-01 06:00:00 Anadarko Petroleum 1 load 1.0 load 1970-01-01 00:00:00 -259 labcorp.com 1970-01-01 2018-09-01 06:00:00 Laboratory Corp. of America 1 load 1.0 load 1970-01-01 00:00:00 -260 pacificlife.com 1970-01-01 2018-09-01 06:00:00 Pacific Life 1 load 1.0 load 1970-01-01 00:00:00 -261 newscorp.com 1970-01-01 2018-09-01 06:00:00 News Corp. 1 load 1.0 load 1970-01-01 00:00:00 -262 jarden.com 1970-01-01 2018-09-01 06:00:00 Jarden 1 load 1.0 load 1970-01-01 00:00:00 -263 suntrust.com 1970-01-01 2018-09-01 06:00:00 SunTrust Banks 1 load 1.0 load 1970-01-01 00:00:00 -264 avisbudgetgroup.com 1970-01-01 2018-09-01 06:00:00 Avis Budget Group 1 load 1.0 load 1970-01-01 00:00:00 -265 pg.com 1970-01-01 2018-09-01 06:00:00 Procter & Gamble 1 load 1.0 load 1970-01-01 00:00:00 -266 broadcom.com 1970-01-01 2018-09-01 06:00:00 Broadcom 1 load 1.0 load 1970-01-01 00:00:00 -267 amfam.com 1970-01-01 2018-09-01 06:00:00 American Family Insurance Group 1 load 1.0 load 1970-01-01 00:00:00 -268 level3.com 1970-01-01 2018-09-01 06:00:00 Level 3 Communications 1 load 1.0 load 1970-01-01 00:00:00 -269 tenneco.com 1970-01-01 2018-09-01 06:00:00 Tenneco 1 load 1.0 load 1970-01-01 00:00:00 -270 unfi.com 1970-01-01 2018-09-01 06:00:00 United Natural Foods 1 load 1.0 load 1970-01-01 00:00:00 -271 deanfoods.com 1970-01-01 2018-09-01 06:00:00 Dean Foods 1 load 1.0 load 1970-01-01 00:00:00 -272 campbellsoupcompany.com 1970-01-01 2018-09-01 06:00:00 Campbell Soup 1 load 1.0 load 1970-01-01 00:00:00 -273 mohawkind.com 1970-01-01 2018-09-01 06:00:00 Mohawk Industries 1 load 1.0 load 1970-01-01 00:00:00 -274 borgwarner.com 1970-01-01 2018-09-01 06:00:00 BorgWarner 1 load 1.0 load 1970-01-01 00:00:00 -275 pvh.com 1970-01-01 2018-09-01 06:00:00 PVH 1 load 1.0 load 1970-01-01 00:00:00 -276 statefarm.com 1970-01-01 2018-09-01 06:00:00 State Farm Insurance Cos. 1 load 1.0 load 1970-01-01 00:00:00 -277 ball.com 1970-01-01 2018-09-01 06:00:00 Ball 1 load 1.0 load 1970-01-01 00:00:00 -278 oreillyauto.com 1970-01-01 2018-09-01 06:00:00 O Reilly Automotive 1 load 1.0 load 1970-01-01 00:00:00 -279 eversource.com 1970-01-01 2018-09-01 06:00:00 Eversource Energy 1 load 1.0 load 1970-01-01 00:00:00 -280 franklinresources.com 1970-01-01 2018-09-01 06:00:00 Franklin Resources 1 load 1.0 load 1970-01-01 00:00:00 -281 masco.com 1970-01-01 2018-09-01 06:00:00 Masco 1 load 1.0 load 1970-01-01 00:00:00 -282 lithia.com 1970-01-01 2018-09-01 06:00:00 Lithia Motors 1 load 1.0 load 1970-01-01 00:00:00 -283 kkr.com 1970-01-01 2018-09-01 06:00:00 KKR 1 load 1.0 load 1970-01-01 00:00:00 -284 oneok.com 1970-01-01 2018-09-01 06:00:00 Oneok 1 load 1.0 load 1970-01-01 00:00:00 -285 newmont.com 1970-01-01 2018-09-01 06:00:00 Newmont Mining 1 load 1.0 load 1970-01-01 00:00:00 -286 pplweb.com 1970-01-01 2018-09-01 06:00:00 PPL 1 load 1.0 load 1970-01-01 00:00:00 -287 google.com 1970-01-01 2018-09-01 06:00:00 Alphabet 1 load 1.0 load 1970-01-01 00:00:00 -288 spartannash.com 1970-01-01 2018-09-01 06:00:00 SpartanNash 1 load 1.0 load 1970-01-01 00:00:00 -289 quantaservices.com 1970-01-01 2018-09-01 06:00:00 Quanta Services 1 load 1.0 load 1970-01-01 00:00:00 -290 xpo.com 1970-01-01 2018-09-01 06:00:00 XPO Logistics 1 load 1.0 load 1970-01-01 00:00:00 -291 ralphlauren.com 1970-01-01 2018-09-01 06:00:00 Ralph Lauren 1 load 1.0 load 1970-01-01 00:00:00 -292 interpublic.com 1970-01-01 2018-09-01 06:00:00 Interpublic Group 1 load 1.0 load 1970-01-01 00:00:00 -293 steeldynamics.com 1970-01-01 2018-09-01 06:00:00 Steel Dynamics 1 load 1.0 load 1970-01-01 00:00:00 -294 wesco.com 1970-01-01 2018-09-01 06:00:00 WESCO International 1 load 1.0 load 1970-01-01 00:00:00 -295 questdiagnostics.com 1970-01-01 2018-09-01 06:00:00 Quest Diagnostics 1 load 1.0 load 1970-01-01 00:00:00 -296 bostonscientific.com 1970-01-01 2018-09-01 06:00:00 Boston Scientific 1 load 1.0 load 1970-01-01 00:00:00 -297 agcocorp.com 1970-01-01 2018-09-01 06:00:00 AGCO 1 load 1.0 load 1970-01-01 00:00:00 -298 comcastcorporation.com 1970-01-01 2018-09-01 06:00:00 Comcast 1 load 1.0 load 1970-01-01 00:00:00 -299 footlocker-inc.com 1970-01-01 2018-09-01 06:00:00 Foot Locker 1 load 1.0 load 1970-01-01 00:00:00 -300 thehersheycompany.com 1970-01-01 2018-09-01 06:00:00 Hershey 1 load 1.0 load 1970-01-01 00:00:00 -450 nvrinc.com 1970-01-01 2018-09-01 06:00:00 NVR 1 load 1.0 load 1970-01-01 00:00:00 -301 centerpointenergy.com 1970-01-01 2018-09-01 06:00:00 CenterPoint Energy 1 load 1.0 load 1970-01-01 00:00:00 -302 williams.com 1970-01-01 2018-09-01 06:00:00 Williams 1 load 1.0 load 1970-01-01 00:00:00 -303 dickssportinggoods.com 1970-01-01 2018-09-01 06:00:00 Dicks Sporting Goods 1 load 1.0 load 1970-01-01 00:00:00 -304 livenation.com 1970-01-01 2018-09-01 06:00:00 Live Nation Entertainment 1 load 1.0 load 1970-01-01 00:00:00 -305 mutualofomaha.com 1970-01-01 2018-09-01 06:00:00 Mutual of Omaha Insurance 1 load 1.0 load 1970-01-01 00:00:00 -306 wrberkley.com 1970-01-01 2018-09-01 06:00:00 W.R. Berkley 1 load 1.0 load 1970-01-01 00:00:00 -307 lkqcorp.com 1970-01-01 2018-09-01 06:00:00 LKQ 1 load 1.0 load 1970-01-01 00:00:00 -308 avoncompany.com 1970-01-01 2018-09-01 06:00:00 Avon Products 1 load 1.0 load 1970-01-01 00:00:00 -309 target.com 1970-01-01 2018-09-01 06:00:00 Target 1 load 1.0 load 1970-01-01 00:00:00 -310 darden.com 1970-01-01 2018-09-01 06:00:00 Darden Restaurants 1 load 1.0 load 1970-01-01 00:00:00 -311 kindredhealthcare.com 1970-01-01 2018-09-01 06:00:00 Kindred Healthcare 1 load 1.0 load 1970-01-01 00:00:00 -312 weyerhaeuser.com 1970-01-01 2018-09-01 06:00:00 Weyerhaeuser 1 load 1.0 load 1970-01-01 00:00:00 -313 caseys.com 1970-01-01 2018-09-01 06:00:00 Caseys General Stores 1 load 1.0 load 1970-01-01 00:00:00 -314 sealedair.com 1970-01-01 2018-09-01 06:00:00 Sealed Air 1 load 1.0 load 1970-01-01 00:00:00 -315 53.com 1970-01-01 2018-09-01 06:00:00 Fifth Third Bancorp 1 load 1.0 load 1970-01-01 00:00:00 -316 dovercorporation.com 1970-01-01 2018-09-01 06:00:00 Dover 1 load 1.0 load 1970-01-01 00:00:00 -317 huntingtoningalls.com 1970-01-01 2018-09-01 06:00:00 Huntington Ingalls Industries 1 load 1.0 load 1970-01-01 00:00:00 -318 netflix.com 1970-01-01 2018-09-01 06:00:00 Netflix 1 load 1.0 load 1970-01-01 00:00:00 -319 dillards.com 1970-01-01 2018-09-01 06:00:00 Dillards 1 load 1.0 load 1970-01-01 00:00:00 -320 jnj.com 1970-01-01 2018-09-01 06:00:00 Johnson & Johnson 1 load 1.0 load 1970-01-01 00:00:00 -321 emcorgroup.com 1970-01-01 2018-09-01 06:00:00 EMCOR Group 1 load 1.0 load 1970-01-01 00:00:00 -322 edwardjones.com 1970-01-01 2018-09-01 06:00:00 Jones Financial 1 load 1.0 load 1970-01-01 00:00:00 -323 aksteel.com 1970-01-01 2018-09-01 06:00:00 AK Steel Holding 1 load 1.0 load 1970-01-01 00:00:00 -324 ugicorp.com 1970-01-01 2018-09-01 06:00:00 UGI 1 load 1.0 load 1970-01-01 00:00:00 -325 expediainc.com 1970-01-01 2018-09-01 06:00:00 Expedia 1 load 1.0 load 1970-01-01 00:00:00 -326 salesforce.com 1970-01-01 2018-09-01 06:00:00 salesforce.com 1 load 1.0 load 1970-01-01 00:00:00 -327 targaresources.com 1970-01-01 2018-09-01 06:00:00 Targa Resources 1 load 1.0 load 1970-01-01 00:00:00 -328 apachecorp.com 1970-01-01 2018-09-01 06:00:00 Apache 1 load 1.0 load 1970-01-01 00:00:00 -329 spiritaero.com 1970-01-01 2018-09-01 06:00:00 Spirit AeroSystems Holdings 1 load 1.0 load 1970-01-01 00:00:00 -330 expeditors.com 1970-01-01 2018-09-01 06:00:00 Expeditors International of Washington 1 load 1.0 load 1970-01-01 00:00:00 -331 metlife.com 1970-01-01 2018-09-01 06:00:00 MetLife 1 load 1.0 load 1970-01-01 00:00:00 -332 anixter.com 1970-01-01 2018-09-01 06:00:00 Anixter International 1 load 1.0 load 1970-01-01 00:00:00 -333 fisglobal.com 1970-01-01 2018-09-01 06:00:00 Fidelity National Information Services 1 load 1.0 load 1970-01-01 00:00:00 -334 asburyauto.com 1970-01-01 2018-09-01 06:00:00 Asbury Automotive Group 1 load 1.0 load 1970-01-01 00:00:00 -335 hess.com 1970-01-01 2018-09-01 06:00:00 Hess 1 load 1.0 load 1970-01-01 00:00:00 -336 ryder.com 1970-01-01 2018-09-01 06:00:00 Ryder System 1 load 1.0 load 1970-01-01 00:00:00 -337 terex.com 1970-01-01 2018-09-01 06:00:00 Terex 1 load 1.0 load 1970-01-01 00:00:00 -338 cokecce.com 1970-01-01 2018-09-01 06:00:00 Coca-Cola European Partners 1 load 1.0 load 1970-01-01 00:00:00 -339 auto-owners.com 1970-01-01 2018-09-01 06:00:00 Auto-Owners Insurance 1 load 1.0 load 1970-01-01 00:00:00 -340 cablevision.com 1970-01-01 2018-09-01 06:00:00 Cablevision Systems 1 load 1.0 load 1970-01-01 00:00:00 -341 symantec.com 1970-01-01 2018-09-01 06:00:00 Symantec 1 load 1.0 load 1970-01-01 00:00:00 -342 mckesson.com 1970-01-01 2018-09-01 06:00:00 McKesson 1 load 1.0 load 1970-01-01 00:00:00 -343 adm.com 1970-01-01 2018-09-01 06:00:00 Archer Daniels Midland 1 load 1.0 load 1970-01-01 00:00:00 -344 aboutschwab.com 1970-01-01 2018-09-01 06:00:00 Charles Schwab 1 load 1.0 load 1970-01-01 00:00:00 -345 calpine.com 1970-01-01 2018-09-01 06:00:00 Calpine 1 load 1.0 load 1970-01-01 00:00:00 -346 cmsenergy.com 1970-01-01 2018-09-01 06:00:00 CMS Energy 1 load 1.0 load 1970-01-01 00:00:00 -347 alliancedata.com 1970-01-01 2018-09-01 06:00:00 Alliance Data Systems 1 load 1.0 load 1970-01-01 00:00:00 -348 jetblue.com 1970-01-01 2018-09-01 06:00:00 JetBlue Airways 1 load 1.0 load 1970-01-01 00:00:00 -349 discoverycommunications.com 1970-01-01 2018-09-01 06:00:00 Discovery Communications 1 load 1.0 load 1970-01-01 00:00:00 -350 trin.net 1970-01-01 2018-09-01 06:00:00 Trinity Industries 1 load 1.0 load 1970-01-01 00:00:00 -351 sanmina.com 1970-01-01 2018-09-01 06:00:00 Sanmina 1 load 1.0 load 1970-01-01 00:00:00 -352 ncr.com 1970-01-01 2018-09-01 06:00:00 NCR 1 load 1.0 load 1970-01-01 00:00:00 -353 fmctechnologies.com 1970-01-01 2018-09-01 06:00:00 FMC Technologies 1 load 1.0 load 1970-01-01 00:00:00 -354 marathonpetroleum.com 1970-01-01 2018-09-01 06:00:00 Marathon Petroleum 1 load 1.0 load 1970-01-01 00:00:00 -355 erieinsurance.com 1970-01-01 2018-09-01 06:00:00 Erie Insurance Group 1 load 1.0 load 1970-01-01 00:00:00 -356 rockwellautomation.com 1970-01-01 2018-09-01 06:00:00 Rockwell Automation 1 load 1.0 load 1970-01-01 00:00:00 -357 drpeppersnapplegroup.com 1970-01-01 2018-09-01 06:00:00 Dr Pepper Snapple Group 1 load 1.0 load 1970-01-01 00:00:00 -358 iheartmedia.com 1970-01-01 2018-09-01 06:00:00 iHeartMedia 1 load 1.0 load 1970-01-01 00:00:00 -359 tractorsupply.com 1970-01-01 2018-09-01 06:00:00 Tractor Supply 1 load 1.0 load 1970-01-01 00:00:00 -360 jbhunt.com 1970-01-01 2018-09-01 06:00:00 J.B. Hunt Transport Services 1 load 1.0 load 1970-01-01 00:00:00 -361 cmc.com 1970-01-01 2018-09-01 06:00:00 Commercial Metals 1 load 1.0 load 1970-01-01 00:00:00 -362 o-i.com 1970-01-01 2018-09-01 06:00:00 Owens-Illinois 1 load 1.0 load 1970-01-01 00:00:00 -363 harman.com 1970-01-01 2018-09-01 06:00:00 Harman International Industries 1 load 1.0 load 1970-01-01 00:00:00 -364 baxalta.com 1970-01-01 2018-09-01 06:00:00 Baxalta 1 load 1.0 load 1970-01-01 00:00:00 -365 freddiemac.com 1970-01-01 2018-09-01 06:00:00 Freddie Mac 1 load 1.0 load 1970-01-01 00:00:00 -366 afginc.com 1970-01-01 2018-09-01 06:00:00 American Financial Group 1 load 1.0 load 1970-01-01 00:00:00 -367 netapp.com 1970-01-01 2018-09-01 06:00:00 NetApp 1 load 1.0 load 1970-01-01 00:00:00 -368 graybar.com 1970-01-01 2018-09-01 06:00:00 Graybar Electric 1 load 1.0 load 1970-01-01 00:00:00 -369 oshkoshcorporation.com 1970-01-01 2018-09-01 06:00:00 Oshkosh 1 load 1.0 load 1970-01-01 00:00:00 -370 ameren.com 1970-01-01 2018-09-01 06:00:00 Ameren 1 load 1.0 load 1970-01-01 00:00:00 -371 amark.com 1970-01-01 2018-09-01 06:00:00 A-Mark Precious Metals 1 load 1.0 load 1970-01-01 00:00:00 -372 barnesandnobleinc.com 1970-01-01 2018-09-01 06:00:00 Barnes & Noble 1 load 1.0 load 1970-01-01 00:00:00 -373 dana.com 1970-01-01 2018-09-01 06:00:00 Dana Holding 1 load 1.0 load 1970-01-01 00:00:00 -374 cbrands.com 1970-01-01 2018-09-01 06:00:00 Constellation Brands 1 load 1.0 load 1970-01-01 00:00:00 -375 lifepointhealth.net 1970-01-01 2018-09-01 06:00:00 LifePoint Health 1 load 1.0 load 1970-01-01 00:00:00 -376 pepsico.com 1970-01-01 2018-09-01 06:00:00 PepsiCo 1 load 1.0 load 1970-01-01 00:00:00 -377 zimmerbiomet.com 1970-01-01 2018-09-01 06:00:00 Zimmer Biomet Holdings 1 load 1.0 load 1970-01-01 00:00:00 -378 harley-davidson.com 1970-01-01 2018-09-01 06:00:00 Harley-Davidson 1 load 1.0 load 1970-01-01 00:00:00 -379 pultegroupinc.com 1970-01-01 2018-09-01 06:00:00 PulteGroup 1 load 1.0 load 1970-01-01 00:00:00 -380 newellbrands.com 1970-01-01 2018-09-01 06:00:00 Newell Brands 1 load 1.0 load 1970-01-01 00:00:00 -381 averydennison.com 1970-01-01 2018-09-01 06:00:00 Avery Dennison 1 load 1.0 load 1970-01-01 00:00:00 -382 jll.com 1970-01-01 2018-09-01 06:00:00 Jones Lang LaSalle 1 load 1.0 load 1970-01-01 00:00:00 -383 wecenergygroup.com 1970-01-01 2018-09-01 06:00:00 WEC Energy Group 1 load 1.0 load 1970-01-01 00:00:00 -384 marathonoil.com 1970-01-01 2018-09-01 06:00:00 Marathon Oil 1 load 1.0 load 1970-01-01 00:00:00 -385 ta-petrol.com 1970-01-01 2018-09-01 06:00:00 TravelCenters of America 1 load 1.0 load 1970-01-01 00:00:00 -386 unitedrentals.com 1970-01-01 2018-09-01 06:00:00 United Rentals 1 load 1.0 load 1970-01-01 00:00:00 -387 utc.com 1970-01-01 2018-09-01 06:00:00 United Technologies 1 load 1.0 load 1970-01-01 00:00:00 -388 hrggroup.com 1970-01-01 2018-09-01 06:00:00 HRG Group 1 load 1.0 load 1970-01-01 00:00:00 -389 oldrepublic.com 1970-01-01 2018-09-01 06:00:00 Old Republic International 1 load 1.0 load 1970-01-01 00:00:00 -390 windstream.com 1970-01-01 2018-09-01 06:00:00 Windstream Holdings 1 load 1.0 load 1970-01-01 00:00:00 -391 starwoodhotels.com 1970-01-01 2018-09-01 06:00:00 Starwood Hotels & Resorts 1 load 1.0 load 1970-01-01 00:00:00 -392 delekus.com 1970-01-01 2018-09-01 06:00:00 Delek US Holdings 1 load 1.0 load 1970-01-01 00:00:00 -393 packagingcorp.com 1970-01-01 2018-09-01 06:00:00 Packaging Corp. of America 1 load 1.0 load 1970-01-01 00:00:00 -394 quintiles.com 1970-01-01 2018-09-01 06:00:00 Quintiles IMS Holdings 1 load 1.0 load 1970-01-01 00:00:00 -395 hanes.com 1970-01-01 2018-09-01 06:00:00 Hanesbrands 1 load 1.0 load 1970-01-01 00:00:00 -396 realogy.com 1970-01-01 2018-09-01 06:00:00 Realogy Holdings 1 load 1.0 load 1970-01-01 00:00:00 -397 mattel.com 1970-01-01 2018-09-01 06:00:00 Mattel 1 load 1.0 load 1970-01-01 00:00:00 -398 aetna.com 1970-01-01 2018-09-01 06:00:00 Aetna 1 load 1.0 load 1970-01-01 00:00:00 -399 motorolasolutions.com 1970-01-01 2018-09-01 06:00:00 Motorola Solutions 1 load 1.0 load 1970-01-01 00:00:00 -400 jmsmucker.com 1970-01-01 2018-09-01 06:00:00 J.M. Smucker 1 load 1.0 load 1970-01-01 00:00:00 -401 regions.com 1970-01-01 2018-09-01 06:00:00 Regions Financial 1 load 1.0 load 1970-01-01 00:00:00 -402 celanese.com 1970-01-01 2018-09-01 06:00:00 Celanese 1 load 1.0 load 1970-01-01 00:00:00 -403 thecloroxcompany.com 1970-01-01 2018-09-01 06:00:00 Clorox 1 load 1.0 load 1970-01-01 00:00:00 -404 ingredion.com 1970-01-01 2018-09-01 06:00:00 Ingredion 1 load 1.0 load 1970-01-01 00:00:00 -405 genesishcc.com 1970-01-01 2018-09-01 06:00:00 Genesis Healthcare 1 load 1.0 load 1970-01-01 00:00:00 -406 peabodyenergy.com 1970-01-01 2018-09-01 06:00:00 Peabody Energy 1 load 1.0 load 1970-01-01 00:00:00 -407 alaskaair.com 1970-01-01 2018-09-01 06:00:00 Alaska Air Group 1 load 1.0 load 1970-01-01 00:00:00 -408 seaboardcorp.com 1970-01-01 2018-09-01 06:00:00 Seaboard 1 load 1.0 load 1970-01-01 00:00:00 -409 lowes.com 1970-01-01 2018-09-01 06:00:00 Lowes 1 load 1.0 load 1970-01-01 00:00:00 -410 frontier.com 1970-01-01 2018-09-01 06:00:00 Frontier Communications 1 load 1.0 load 1970-01-01 00:00:00 -411 amphenol.com 1970-01-01 2018-09-01 06:00:00 Amphenol 1 load 1.0 load 1970-01-01 00:00:00 -412 lansingtradegroup.com 1970-01-01 2018-09-01 06:00:00 Lansing Trade Group 1 load 1.0 load 1970-01-01 00:00:00 -413 sandisk.com 1970-01-01 2018-09-01 06:00:00 SanDisk 1 load 1.0 load 1970-01-01 00:00:00 -414 sjm.com 1970-01-01 2018-09-01 06:00:00 St. Jude Medical 1 load 1.0 load 1970-01-01 00:00:00 -415 wyndhamworldwide.com 1970-01-01 2018-09-01 06:00:00 Wyndham Worldwide 1 load 1.0 load 1970-01-01 00:00:00 -416 kellyservices.com 1970-01-01 2018-09-01 06:00:00 Kelly Services 1 load 1.0 load 1970-01-01 00:00:00 -417 westernunion.com 1970-01-01 2018-09-01 06:00:00 Western Union 1 load 1.0 load 1970-01-01 00:00:00 -418 evhc.net 1970-01-01 2018-09-01 06:00:00 Envision Healthcare Holdings 1 load 1.0 load 1970-01-01 00:00:00 -419 visteon.com 1970-01-01 2018-09-01 06:00:00 Visteon 1 load 1.0 load 1970-01-01 00:00:00 -420 ups.com 1970-01-01 2018-09-01 06:00:00 UPS 1 load 1.0 load 1970-01-01 00:00:00 -421 ajg.com 1970-01-01 2018-09-01 06:00:00 Arthur J. Gallagher 1 load 1.0 load 1970-01-01 00:00:00 -422 hosthotels.com 1970-01-01 2018-09-01 06:00:00 Host Hotels & Resorts 1 load 1.0 load 1970-01-01 00:00:00 -423 ashland.com 1970-01-01 2018-09-01 06:00:00 Ashland 1 load 1.0 load 1970-01-01 00:00:00 -424 insight.com 1970-01-01 2018-09-01 06:00:00 Insight Enterprises 1 load 1.0 load 1970-01-01 00:00:00 -425 energyfutureholdings.com 1970-01-01 2018-09-01 06:00:00 Energy Future Holdings 1 load 1.0 load 1970-01-01 00:00:00 -426 markelcorp.com 1970-01-01 2018-09-01 06:00:00 Markel 1 load 1.0 load 1970-01-01 00:00:00 -427 essendant.com 1970-01-01 2018-09-01 06:00:00 Essendant 1 load 1.0 load 1970-01-01 00:00:00 -428 ch2m.com 1970-01-01 2018-09-01 06:00:00 CH2M Hill 1 load 1.0 load 1970-01-01 00:00:00 -429 westernsouthern.com 1970-01-01 2018-09-01 06:00:00 Western & Southern Financial Group 1 load 1.0 load 1970-01-01 00:00:00 -430 owenscorning.com 1970-01-01 2018-09-01 06:00:00 Owens Corning 1 load 1.0 load 1970-01-01 00:00:00 -431 aig.com 1970-01-01 2018-09-01 06:00:00 AIG 1 load 1.0 load 1970-01-01 00:00:00 -432 spglobal.com 1970-01-01 2018-09-01 06:00:00 S&P Global 1 load 1.0 load 1970-01-01 00:00:00 -433 raymondjames.com 1970-01-01 2018-09-01 06:00:00 Raymond James Financial 1 load 1.0 load 1970-01-01 00:00:00 -434 nisource.com 1970-01-01 2018-09-01 06:00:00 NiSource 1 load 1.0 load 1970-01-01 00:00:00 -435 airgas.com 1970-01-01 2018-09-01 06:00:00 Airgas 1 load 1.0 load 1970-01-01 00:00:00 -436 abm.com 1970-01-01 2018-09-01 06:00:00 ABM Industries 1 load 1.0 load 1970-01-01 00:00:00 -437 citizensbank.com 1970-01-01 2018-09-01 06:00:00 Citizens Financial Group 1 load 1.0 load 1970-01-01 00:00:00 -438 boozallen.com 1970-01-01 2018-09-01 06:00:00 Booz Allen Hamilton Holding 1 load 1.0 load 1970-01-01 00:00:00 -439 simon.com 1970-01-01 2018-09-01 06:00:00 Simon Property Group 1 load 1.0 load 1970-01-01 00:00:00 -440 domtar.com 1970-01-01 2018-09-01 06:00:00 Domtar 1 load 1.0 load 1970-01-01 00:00:00 -441 rockwellcollins.com 1970-01-01 2018-09-01 06:00:00 Rockwell Collins 1 load 1.0 load 1970-01-01 00:00:00 -442 prudential.com 1970-01-01 2018-09-01 06:00:00 Prudential Financial 1 load 1.0 load 1970-01-01 00:00:00 -443 lamresearch.com 1970-01-01 2018-09-01 06:00:00 Lam Research 1 load 1.0 load 1970-01-01 00:00:00 -444 fiserv.com 1970-01-01 2018-09-01 06:00:00 Fiserv 1 load 1.0 load 1970-01-01 00:00:00 -445 spectraenergy.com 1970-01-01 2018-09-01 06:00:00 Spectra Energy 1 load 1.0 load 1970-01-01 00:00:00 -446 navient.com 1970-01-01 2018-09-01 06:00:00 Navient 1 load 1.0 load 1970-01-01 00:00:00 -447 biglots.com 1970-01-01 2018-09-01 06:00:00 Big Lots 1 load 1.0 load 1970-01-01 00:00:00 -448 tdsinc.com 1970-01-01 2018-09-01 06:00:00 Telephone & Data Systems 1 load 1.0 load 1970-01-01 00:00:00 -449 firstam.com 1970-01-01 2018-09-01 06:00:00 First American Financial 1 load 1.0 load 1970-01-01 00:00:00 -451 cinfin.com 1970-01-01 2018-09-01 06:00:00 Cincinnati Financial 1 load 1.0 load 1970-01-01 00:00:00 -452 burlingtonstores.com 1970-01-01 2018-09-01 06:00:00 Burlington Stores 1 load 1.0 load 1970-01-01 00:00:00 -453 unitedhealthgroup.com 1970-01-01 2018-09-01 06:00:00 UnitedHealth Group 1 load 1.0 load 1970-01-01 00:00:00 -454 intel.com 1970-01-01 2018-09-01 06:00:00 Intel 1 load 1.0 load 1970-01-01 00:00:00 -455 humana.com 1970-01-01 2018-09-01 06:00:00 Humana 1 load 1.0 load 1970-01-01 00:00:00 -456 disney.com 1970-01-01 2018-09-01 06:00:00 Disney 1 load 1.0 load 1970-01-01 00:00:00 -457 cisco.com 1970-01-01 2018-09-01 06:00:00 Cisco Systems 1 load 1.0 load 1970-01-01 00:00:00 -458 pfizer.com 1970-01-01 2018-09-01 06:00:00 Pfizer 1 load 1.0 load 1970-01-01 00:00:00 -459 dow.com 1970-01-01 2018-09-01 06:00:00 Dow Chemical 1 load 1.0 load 1970-01-01 00:00:00 -460 sysco.com 1970-01-01 2018-09-01 06:00:00 Sysco 1 load 1.0 load 1970-01-01 00:00:00 -461 fedex.com 1970-01-01 2018-09-01 06:00:00 FedEx 1 load 1.0 load 1970-01-01 00:00:00 -462 caterpillar.com 1970-01-01 2018-09-01 06:00:00 Caterpillar 1 load 1.0 load 1970-01-01 00:00:00 -463 lockheedmartin.com 1970-01-01 2018-09-01 06:00:00 Lockheed Martin 1 load 1.0 load 1970-01-01 00:00:00 -464 cvshealth.com 1970-01-01 2018-09-01 06:00:00 CVS Health 1 load 1.0 load 1970-01-01 00:00:00 -465 newyorklife.com 1970-01-01 2018-09-01 06:00:00 New York Life Insurance 1 load 1.0 load 1970-01-01 00:00:00 -466 coca-colacompany.com 1970-01-01 2018-09-01 06:00:00 Coca-Cola 1 load 1.0 load 1970-01-01 00:00:00 -467 hcahealthcare.com 1970-01-01 2018-09-01 06:00:00 HCA Holdings 1 load 1.0 load 1970-01-01 00:00:00 -468 ingrammicro.com 1970-01-01 2018-09-01 06:00:00 Ingram Micro 1 load 1.0 load 1970-01-01 00:00:00 -469 energytransfer.com 1970-01-01 2018-09-01 06:00:00 Energy Transfer Equity 1 load 1.0 load 1970-01-01 00:00:00 -470 tysonfoods.com 1970-01-01 2018-09-01 06:00:00 Tyson Foods 1 load 1.0 load 1970-01-01 00:00:00 -471 aa.com 1970-01-01 2018-09-01 06:00:00 American Airlines Group 1 load 1.0 load 1970-01-01 00:00:00 -472 delta.com 1970-01-01 2018-09-01 06:00:00 Delta Air Lines 1 load 1.0 load 1970-01-01 00:00:00 -473 nationwide.com 1970-01-01 2018-09-01 06:00:00 Nationwide 1 load 1.0 load 1970-01-01 00:00:00 -474 johnsoncontrols.com 1970-01-01 2018-09-01 06:00:00 Johnson Controls 1 load 1.0 load 1970-01-01 00:00:00 -475 gm.com 1970-01-01 2018-09-01 06:00:00 General Motors 1 load 1.0 load 1970-01-01 00:00:00 -476 bestbuy.com 1970-01-01 2018-09-01 06:00:00 Best Buy 1 load 1.0 load 1970-01-01 00:00:00 -477 merck.com 1970-01-01 2018-09-01 06:00:00 Merck 1 load 1.0 load 1970-01-01 00:00:00 -478 libertymutual.com 1970-01-01 2018-09-01 06:00:00 Liberty Mutual Insurance Group 1 load 1.0 load 1970-01-01 00:00:00 -479 gs.com 1970-01-01 2018-09-01 06:00:00 Goldman Sachs Group 1 load 1.0 load 1970-01-01 00:00:00 -480 honeywell.com 1970-01-01 2018-09-01 06:00:00 Honeywell International 1 load 1.0 load 1970-01-01 00:00:00 -481 massmutual.com 1970-01-01 2018-09-01 06:00:00 Massachusetts Mutual Life Insurance 1 load 1.0 load 1970-01-01 00:00:00 -482 oracle.com 1970-01-01 2018-09-01 06:00:00 Oracle 1 load 1.0 load 1970-01-01 00:00:00 -483 morganstanley.com 1970-01-01 2018-09-01 06:00:00 Morgan Stanley 1 load 1.0 load 1970-01-01 00:00:00 -484 cigna.com 1970-01-01 2018-09-01 06:00:00 Cigna 1 load 1.0 load 1970-01-01 00:00:00 -485 unitedcontinentalholdings.com 1970-01-01 2018-09-01 06:00:00 United Continental Holdings 1 load 1.0 load 1970-01-01 00:00:00 -486 ford.com 1970-01-01 2018-09-01 06:00:00 Ford Motor 1 load 1.0 load 1970-01-01 00:00:00 -487 allstate.com 1970-01-01 2018-09-01 06:00:00 Allstate 1 load 1.0 load 1970-01-01 00:00:00 -488 tiaa.org 1970-01-01 2018-09-01 06:00:00 TIAA 1 load 1.0 load 1970-01-01 00:00:00 -489 intlfcstone.com 1970-01-01 2018-09-01 06:00:00 INTL FCStone 1 load 1.0 load 1970-01-01 00:00:00 -490 chsinc.com 1970-01-01 2018-09-01 06:00:00 CHS 1 load 1.0 load 1970-01-01 00:00:00 -491 americanexpress.com 1970-01-01 2018-09-01 06:00:00 American Express 1 load 1.0 load 1970-01-01 00:00:00 -492 gilead.com 1970-01-01 2018-09-01 06:00:00 Gilead Sciences 1 load 1.0 load 1970-01-01 00:00:00 -493 publix.com 1970-01-01 2018-09-01 06:00:00 Publix Super Markets 1 load 1.0 load 1970-01-01 00:00:00 -494 generaldynamics.com 1970-01-01 2018-09-01 06:00:00 General Dynamics 1 load 1.0 load 1970-01-01 00:00:00 -495 tjx.com 1970-01-01 2018-09-01 06:00:00 TJX 1 load 1.0 load 1970-01-01 00:00:00 -496 conocophillips.com 1970-01-01 2018-09-01 06:00:00 ConocoPhillips 1 load 1.0 load 1970-01-01 00:00:00 -497 att.com 1970-01-01 2018-09-01 06:00:00 AT&T 1 load 1.0 load 1970-01-01 00:00:00 -498 nike.com 1970-01-01 2018-09-01 06:00:00 Nike 1 load 1.0 load 1970-01-01 00:00:00 -499 wfscorp.com 1970-01-01 2018-09-01 06:00:00 World Fuel Services 1 load 1.0 load 1970-01-01 00:00:00 -500 3m.com 1970-01-01 2018-09-01 06:00:00 3M 1 load 1.0 load 1970-01-01 00:00:00 -501 mondelezinternational.com 1970-01-01 2018-09-01 06:00:00 Mondelez International 1 load 1.0 load 1970-01-01 00:00:00 -502 exeloncorp.com 1970-01-01 2018-09-01 06:00:00 Exelon 1 load 1.0 load 1970-01-01 00:00:00 -503 21cf.com 1970-01-01 2018-09-01 06:00:00 Twenty-First Century Fox 1 load 1.0 load 1970-01-01 00:00:00 -504 johndeere.com 1970-01-01 2018-09-01 06:00:00 Deere 1 load 1.0 load 1970-01-01 00:00:00 -505 tsocorp.com 1970-01-01 2018-09-01 06:00:00 Tesoro 1 load 1.0 load 1970-01-01 00:00:00 -506 timewarner.com 1970-01-01 2018-09-01 06:00:00 Time Warner 1 load 1.0 load 1970-01-01 00:00:00 -507 redhat.com 1970-01-01 2018-09-01 06:00:00 Red Hat 1 load 1.0 load 1970-01-01 00:00:00 -509 openwrt.org 1970-01-01 2018-09-01 06:00:00 OpenWRT 1 load 1.0 load 1970-01-01 00:00:00 -510 panasonic.com 1970-01-01 2018-09-01 06:00:00 Panasonic 1 load 1.0 load 1970-01-01 00:00:00 -511 comcast.net 1970-01-01 2018-09-01 06:00:00 Comcast 1 load 1.0 load 1970-01-01 00:00:00 -512 linux.org 1970-01-01 2018-09-01 06:00:00 Linux 1 load 1.0 load 1970-01-01 00:00:00 -514 northwesternmutual.com 1970-01-01 2018-09-01 06:00:00 Northwestern Mutual 1 load 1.0 load 1970-01-01 00:00:00 -515 kde.org 1970-01-01 2018-09-01 06:00:00 KDE 1 load 1.0 load 1970-01-01 00:00:00 -516 twitter.com 1970-01-01 2018-09-01 06:00:00 Twitter 1 load 1.0 load 1970-01-01 00:00:00 -517 adobe.com 1970-01-01 2018-09-01 06:00:00 Adobe 1 load 1.0 load 1970-01-01 00:00:00 -519 acm.org 1970-01-01 2018-09-12 02:01:59 ACM 1 load 1.0 load 1970-01-01 00:00:00 -520 outdoors@acm.org 1970-01-01 2018-09-12 02:32:53 University of Missouri 1 load 1.0 load 2013-07-15 00:00:00 -521 freebsd.org 1970-01-01 2018-09-13 21:15:22 Free BSD 1 load 1.0 load 1970-01-01 00:00:00 -\. - - --- --- Data for Name: contributor_repo; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.contributor_repo (cntrb_repo_id, repo_git, repo_name, gh_repo_id, cntrb_category, event_id, created_at, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: contributors; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.contributors (cntrb_login, cntrb_email, cntrb_full_name, cntrb_company, cntrb_created_at, cntrb_type, cntrb_fake, cntrb_deleted, cntrb_long, cntrb_lat, cntrb_country_code, cntrb_state, cntrb_city, cntrb_location, cntrb_canonical, cntrb_last_used, gh_user_id, gh_login, gh_url, gh_html_url, gh_node_id, gh_avatar_url, gh_gravatar_id, gh_followers_url, gh_following_url, gh_gists_url, gh_starred_url, gh_subscriptions_url, gh_organizations_url, gh_repos_url, gh_events_url, gh_received_events_url, gh_type, gh_site_admin, gl_web_url, gl_avatar_url, gl_state, gl_username, gl_full_name, gl_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -not-provided \N \N \N 2019-06-13 11:33:39 \N 0 0 \N \N \N \N \N \N \N \N 1 nobody http://fake.me http://fake.me x http://fake.me \N http://fake.me http://fake.me http://fake.me http://fake.me http://fake.me http://fake.me http://fake.me http://fake.me \N \N \N \N \N \N \N \N \N \N \N \N 2019-06-13 16:35:25 00000000-0000-0000-0000-000000000000 -nan kannayoshihiro@gmail.com KANNA Yoshihiro UTMC 2009-04-17 12:43:58 \N 0 0 \N \N \N \N \N \N kannayoshihiro@gmail.com 2021-01-28 21:56:10-06 74832 nan https://api.github.com/users/nan https://github.com/nan MDQ6VXNlcjc0ODMy https://avatars.githubusercontent.com/u/74832?v=4 https://api.github.com/users/nan/followers https://api.github.com/users/nan/following{/other_user} https://api.github.com/users/nan/gists{/gist_id} https://api.github.com/users/nan/starred{/owner}{/repo} https://api.github.com/users/nan/subscriptions https://api.github.com/users/nan/orgs https://api.github.com/users/nan/repos https://api.github.com/users/nan/events{/privacy} https://api.github.com/users/nan/received_events User false \N \N \N \N \N \N GitHub API Worker 1.0.0 GitHub API 2021-10-28 15:23:46 01000000-0000-0000-0000-000000000000 -\. - - --- --- Data for Name: contributors_aliases; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.contributors_aliases (cntrb_alias_id, canonical_email, alias_email, cntrb_active, cntrb_last_modified, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: discourse_insights; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.discourse_insights (msg_discourse_id, msg_id, discourse_act, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: dm_repo_annual; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.dm_repo_annual (repo_id, email, affiliation, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: dm_repo_group_annual; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.dm_repo_group_annual (repo_group_id, email, affiliation, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: dm_repo_group_monthly; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.dm_repo_group_monthly (repo_group_id, email, affiliation, month, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: dm_repo_group_weekly; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.dm_repo_group_weekly (repo_group_id, email, affiliation, week, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: dm_repo_monthly; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.dm_repo_monthly (repo_id, email, affiliation, month, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: dm_repo_weekly; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.dm_repo_weekly (repo_id, email, affiliation, week, year, added, removed, whitespace, files, patches, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: exclude; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.exclude (id, projects_id, email, domain) FROM stdin; -\. - - --- --- Data for Name: issue_assignees; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.issue_assignees (issue_assignee_id, issue_id, repo_id, issue_assignee_src_id, issue_assignee_src_node, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: issue_events; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.issue_events (event_id, issue_id, repo_id, action, action_commit_hash, created_at, node_id, node_url, platform_id, issue_event_src_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: issue_labels; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.issue_labels (issue_label_id, issue_id, repo_id, label_text, label_description, label_color, label_src_id, label_src_node_id, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: issue_message_ref; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.issue_message_ref (issue_msg_ref_id, issue_id, repo_id, msg_id, issue_msg_ref_src_node_id, issue_msg_ref_src_comment_id, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: issues; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.issues (issue_id, repo_id, pull_request, pull_request_id, created_at, issue_title, issue_body, comment_count, updated_at, closed_at, due_on, repository_url, issue_url, labels_url, comments_url, events_url, html_url, issue_state, issue_node_id, gh_issue_number, gh_issue_id, gh_user_id, tool_source, tool_version, data_source, data_collection_date, reporter_id, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: libraries; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.libraries (library_id, repo_id, platform, name, created_timestamp, updated_timestamp, library_description, keywords, library_homepage, license, version_count, latest_release_timestamp, latest_release_number, package_manager_id, dependency_count, dependent_library_count, primary_language, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: library_dependencies; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.library_dependencies (lib_dependency_id, library_id, manifest_platform, manifest_filepath, manifest_kind, repo_id_branch, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: library_version; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.library_version (library_version_id, library_id, library_platform, version_number, version_release_date, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: lstm_anomaly_models; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.lstm_anomaly_models (model_id, model_name, model_description, look_back_days, training_days, batch_size, metric, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: lstm_anomaly_results; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.lstm_anomaly_results (result_id, repo_id, repo_category, model_id, metric, contamination_factor, mean_absolute_error, remarks, metric_field, mean_absolute_actual_value, mean_absolute_prediction_value, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: message; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.message (msg_id, rgls_id, platform_msg_id, platform_node_id, repo_id, msg_text, msg_timestamp, msg_sender_email, msg_header, pltfrm_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: message_analysis; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.message_analysis (msg_analysis_id, msg_id, worker_run_id, sentiment_score, reconstruction_error, novelty_flag, feedback_flag, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: message_analysis_summary; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.message_analysis_summary (msg_summary_id, repo_id, worker_run_id, positive_ratio, negative_ratio, novel_count, period, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: message_sentiment; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.message_sentiment (msg_analysis_id, msg_id, worker_run_id, sentiment_score, reconstruction_error, novelty_flag, feedback_flag, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: message_sentiment_summary; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.message_sentiment_summary (msg_summary_id, repo_id, worker_run_id, positive_ratio, negative_ratio, novel_count, period, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: platform; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.platform (pltfrm_id, pltfrm_name, pltfrm_version, pltfrm_release_date, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -1 GitHub 3 2019-06-05 Manual Entry Sean Goggins GitHub 2019-06-05 17:23:42 -0 Unresolved 0 2019-06-05 Manual Entry Sean Goggins GitHub 2022-07-28 20:43:00 -2 GitLab 2 2019-06-05 Manual Entry Sean Goggins GitHub 2022-07-28 20:43:00 -\. - - --- --- Data for Name: pull_request_analysis; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_analysis (pull_request_analysis_id, pull_request_id, merge_probability, mechanism, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: pull_request_assignees; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_assignees (pr_assignee_map_id, pull_request_id, repo_id, pr_assignee_src_id, tool_source, tool_version, data_source, data_collection_date, contrib_id) FROM stdin; -\. - - --- --- Data for Name: pull_request_commits; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_commits (pr_cmt_id, pull_request_id, repo_id, pr_cmt_sha, pr_cmt_node_id, pr_cmt_message, pr_cmt_comments_url, pr_cmt_timestamp, pr_cmt_author_email, tool_source, tool_version, data_source, data_collection_date, pr_cmt_author_cntrb_id) FROM stdin; -\. - - --- --- Data for Name: pull_request_events; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_events (pr_event_id, pull_request_id, repo_id, action, action_commit_hash, created_at, issue_event_src_id, node_id, node_url, platform_id, pr_platform_event_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: pull_request_files; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_files (pr_file_id, pull_request_id, repo_id, pr_file_additions, pr_file_deletions, pr_file_path, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: pull_request_labels; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_labels (pr_label_id, pull_request_id, repo_id, pr_src_id, pr_src_node_id, pr_src_url, pr_src_description, pr_src_color, pr_src_default_bool, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: pull_request_message_ref; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_message_ref (pr_msg_ref_id, pull_request_id, repo_id, msg_id, pr_message_ref_src_comment_id, pr_message_ref_src_node_id, pr_issue_url, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: pull_request_meta; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_meta (pr_repo_meta_id, pull_request_id, repo_id, pr_head_or_base, pr_src_meta_label, pr_src_meta_ref, pr_sha, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: pull_request_repo; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_repo (pr_repo_id, pr_repo_meta_id, pr_repo_head_or_base, pr_src_repo_id, pr_src_node_id, pr_repo_name, pr_repo_full_name, pr_repo_private_bool, tool_source, tool_version, data_source, data_collection_date, pr_cntrb_id) FROM stdin; -\. - - --- --- Data for Name: pull_request_review_message_ref; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_review_message_ref (pr_review_msg_ref_id, pr_review_id, repo_id, msg_id, pr_review_msg_url, pr_review_src_id, pr_review_msg_src_id, pr_review_msg_node_id, pr_review_msg_diff_hunk, pr_review_msg_path, pr_review_msg_position, pr_review_msg_original_position, pr_review_msg_commit_id, pr_review_msg_original_commit_id, pr_review_msg_updated_at, pr_review_msg_html_url, pr_url, pr_review_msg_author_association, pr_review_msg_start_line, pr_review_msg_original_start_line, pr_review_msg_start_side, pr_review_msg_line, pr_review_msg_original_line, pr_review_msg_side, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: pull_request_reviewers; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_reviewers (pr_reviewer_map_id, pull_request_id, pr_source_id, repo_id, pr_reviewer_src_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: pull_request_reviews; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_reviews (pr_review_id, pull_request_id, repo_id, pr_review_author_association, pr_review_state, pr_review_body, pr_review_submitted_at, pr_review_src_id, pr_review_node_id, pr_review_html_url, pr_review_pull_request_url, pr_review_commit_id, platform_id, tool_source, tool_version, data_source, data_collection_date, cntrb_id) FROM stdin; -\. - - --- --- Data for Name: pull_request_teams; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_request_teams (pr_team_id, pull_request_id, pr_src_team_id, pr_src_team_node, pr_src_team_url, pr_team_name, pr_team_slug, pr_team_description, pr_team_privacy, pr_team_permission, pr_team_src_members_url, pr_team_src_repositories_url, pr_team_parent_id, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: pull_requests; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.pull_requests (pull_request_id, repo_id, pr_url, pr_src_id, pr_src_node_id, pr_html_url, pr_diff_url, pr_patch_url, pr_issue_url, pr_augur_issue_id, pr_src_number, pr_src_state, pr_src_locked, pr_src_title, pr_body, pr_created_at, pr_updated_at, pr_closed_at, pr_merged_at, pr_merge_commit_sha, pr_teams, pr_milestone, pr_commits_url, pr_review_comments_url, pr_review_comment_url, pr_comments_url, pr_statuses_url, pr_meta_head_id, pr_meta_base_id, pr_src_issue_url, pr_src_comments_url, pr_src_review_comments_url, pr_src_commits_url, pr_src_statuses_url, pr_src_author_association, tool_source, tool_version, data_source, data_collection_date, pr_augur_contributor_id) FROM stdin; -\. - - --- --- Data for Name: releases; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.releases (release_id, repo_id, release_name, release_description, release_author, release_created_at, release_published_at, release_updated_at, release_is_draft, release_is_prerelease, release_tag_name, release_url, tag_only, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo (repo_id, repo_group_id, repo_git, repo_path, repo_name, repo_added, repo_status, repo_type, url, owner_id, description, primary_language, created_at, forked_from, updated_at, repo_archived_date_collected, repo_archived, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -25452 10 https://github.com/chaoss/whitepaper \N \N 2021-04-17 21:40:42 New \N \N \N \N \N Parent not available \N \N 0 CLI 1.0 Git 2021-04-17 21:40:42 -24441 10 https://github.com/operate-first/operate-first-twitter \N \N 2021-08-25 16:47:47 New \N \N \N \N \N Parent not available \N \N 0 CLI 1.0 Git 2021-08-25 16:47:47 -24442 10 https://github.com/operate-first/blueprint \N \N 2021-08-25 16:47:47 New \N \N \N \N \N Parent not available \N \N 0 CLI 1.0 Git 2021-08-25 16:47:47 -25445 10 https://github.com/chaoss/grimoirelab-perceval-opnfv \N \N 2020-04-17 21:40:39 New \N \N \N \N \N Parent not available \N \N 0 CLI 1.0 Git 2021-04-17 21:40:39 -1 1 https://github.com/chaoss/augur \N \N 2021-08-10 14:28:44 New \N \N \N \N \N Parent not available \N \N 0 data load one git 2021-06-05 18:41:14 -25430 10 https://github.com/SociallyCompute/update-test \N \N 2021-10-07 08:50:13 New \N \N \N \N \N Parent not available \N \N 0 \N \N \N \N -25450 10 https://github.com/chaoss/grimoirelab-hatstall \N \N 2021-04-17 21:40:42 New \N \N \N \N \N Parent not available \N \N 0 CLI 1.0 Git 2021-04-17 21:40:42 -\. - - --- --- Data for Name: repo_badging; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_badging (badge_collection_id, repo_id, created_at, tool_source, tool_version, data_source, data_collection_date, data) FROM stdin; -\. - - --- --- Data for Name: repo_cluster_messages; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_cluster_messages (msg_cluster_id, repo_id, cluster_content, cluster_mechanism, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_dependencies; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_dependencies (repo_dependencies_id, repo_id, dep_name, dep_count, dep_language, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_deps_libyear; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_deps_libyear (repo_deps_libyear_id, repo_id, name, requirement, type, package_manager, current_verion, latest_version, current_release_date, latest_release_date, libyear, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_deps_scorecard; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_deps_scorecard (repo_deps_scorecard_id, repo_id, name, status, score, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_group_insights; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_group_insights (rgi_id, repo_group_id, rgi_metric, rgi_value, cms_id, rgi_fresh, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_groups; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_groups (repo_group_id, rg_name, rg_description, rg_website, rg_recache, rg_last_modified, rg_type, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -1 Default Repo Group The default repo group created by the schema generation script 0 2019-06-03 15:55:20 GitHub Organization load one git 2019-06-05 13:36:25 -10 Default Repo Group The default repo group created by the schema generation script 0 2021-06-03 15:55:20 GitHub Organization load one git 2019-06-05 13:36:25 -\. - - --- --- Data for Name: repo_groups_list_serve; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_groups_list_serve (rgls_id, repo_group_id, rgls_name, rgls_description, rgls_sponsor, rgls_email, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_info; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_info (repo_info_id, repo_id, last_updated, issues_enabled, open_issues, pull_requests_enabled, wiki_enabled, pages_enabled, fork_count, default_branch, watchers_count, "UUID", license, stars_count, committers_count, issue_contributors_count, changelog_file, contributing_file, license_file, code_of_conduct_file, security_issue_file, security_audit_file, status, keywords, commit_count, issues_count, issues_closed, pull_request_count, pull_requests_open, pull_requests_closed, pull_requests_merged, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_insights; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_insights (ri_id, repo_id, ri_metric, ri_value, ri_date, ri_fresh, tool_source, tool_version, data_source, data_collection_date, ri_score, ri_field, ri_detection_method) FROM stdin; -\. - - --- --- Data for Name: repo_insights_records; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_insights_records (ri_id, repo_id, ri_metric, ri_field, ri_value, ri_date, ri_score, ri_detection_method, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_labor; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_labor (repo_labor_id, repo_id, repo_clone_date, rl_analysis_date, programming_language, file_path, file_name, total_lines, code_lines, comment_lines, blank_lines, code_complexity, repo_url, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_meta; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_meta (repo_id, rmeta_id, rmeta_name, rmeta_value, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_sbom_scans; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_sbom_scans (rsb_id, repo_id, sbom_scan) FROM stdin; -\. - - --- --- Data for Name: repo_stats; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_stats (repo_id, rstat_id, rstat_name, rstat_value, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_test_coverage; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_test_coverage (repo_id, repo_clone_date, rtc_analysis_date, programming_language, file_path, file_name, testing_tool, file_statement_count, file_subroutine_count, file_statements_tested, file_subroutines_tested, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repo_topic; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repo_topic (repo_topic_id, repo_id, topic_id, topic_prob, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: repos_fetch_log; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.repos_fetch_log (repos_id, status, date) FROM stdin; -\. - - --- --- Data for Name: settings; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.settings (id, setting, value, last_modified) FROM stdin; -5 report_date committer 2019-05-07 12:47:26 -6 report_attribution author 2019-05-07 12:47:26 -10 google_analytics disabled 2019-05-07 12:47:26 -11 update_frequency 24 2019-05-07 12:47:26 -12 database_version 7 2019-05-07 12:47:26 -13 results_visibility show 2019-05-07 12:47:26 -1 start_date 2001-01-01 1900-01-22 20:34:51 -4 log_level Debug 2019-05-07 12:47:26 -2 repo_directory /augur/repos/ 2019-05-07 12:47:26 -8 affiliations_processed 2001-08-26 10:03:29.815013+00 1900-01-22 20:36:27 -9 aliases_processed 2001-08-26 10:03:29.815013+00 1900-01-22 20:36:27 -7 working_author done 1900-01-22 20:23:43 -3 utility_status Idle 1900-01-22 20:38:07 -\. - - --- --- Data for Name: topic_words; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.topic_words (topic_words_id, topic_id, word, word_prob, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: unknown_cache; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.unknown_cache (type, repo_group_id, email, domain, added, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: unresolved_commit_emails; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.unresolved_commit_emails (email_unresolved_id, email, name, tool_source, tool_version, data_source, data_collection_date) FROM stdin; -\. - - --- --- Data for Name: utility_log; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.utility_log (id, level, status, attempted) FROM stdin; -\. - - --- --- Data for Name: working_commits; Type: TABLE DATA; Schema: augur_data; Owner: augur --- - -COPY augur_data.working_commits (repos_id, working_commit) FROM stdin; -\. - - --- --- Data for Name: all; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations."all" ("Name", "Bytes", "Lines", "Code", "Comment", "Blank", "Complexity", "Count", "WeightedComplexity", "Files") FROM stdin; -\. - - --- --- Data for Name: augur_settings; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.augur_settings (id, setting, value, last_modified) FROM stdin; -1 augur_data_version 100 2021-10-12 08:41:51 -\. - - --- --- Data for Name: config; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.config (id, section_name, setting_name, value, type) FROM stdin; -1 Augur developer 0 int -2 Augur version 1 int -5 Facade check_updates 1 int -6 Facade clone_repos 1 int -7 Facade create_xlsx_summary_files 1 int -8 Facade delete_marked_repos 0 int -9 Facade fix_affiliations 1 int -10 Facade force_analysis 1 int -11 Facade force_invalidate_caches 1 int -12 Facade force_updates 1 int -13 Facade limited_run 0 int -14 Facade multithreaded 1 int -15 Facade nuke_stored_affiliations 0 int -16 Facade pull_repos 1 int -17 Facade rebuild_caches 1 int -18 Facade run_analysis 1 int -20 Server cache_expire 3600 str -21 Server host 0.0.0.0 str -22 Server port 5000 int -23 Server workers 6 int -24 Server timeout 6000 int -25 Server ssl false bool -26 Server ssl_cert_file \N \N -27 Server ssl_key_file \N \N -29 Logging log_level INFO str -30 Logging verbose 0 int -31 Logging quiet 0 int -32 Logging debug 0 int -33 Celery concurrency 12 int -34 Redis cache_group 0 int -35 Redis connection_string redis://localhost:6379/ str -19 Facade repo_directory /facade str -28 Logging logs_directory /logs str -3 Keys github_api_key 0 str -4 Keys gitlab_api_key 0 str -\. - - --- --- Data for Name: repos_fetch_log; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.repos_fetch_log (repos_id, status, date) FROM stdin; -\. - - --- --- Data for Name: users; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.users (user_id, login_name, login_hashword, email, text_phone, first_name, last_name, tool_source, tool_version, data_source, data_collection_date, admin) FROM stdin; -\. - - --- --- Data for Name: worker_history; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.worker_history (history_id, repo_id, worker, job_model, oauth_id, "timestamp", status, total_results) FROM stdin; -1 1 workers.repo_info_worker.50723 repo_info 0 2021-10-17 12:05:22 Success 1 -2 1 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 10:34:30 Success 0 -3 1 workers.github_worker.9396 repo_info 0 2021-12-20 10:34:32 Stopped 0 -4 1 workers.github_worker.9396 issues 0 2021-12-20 10:34:50 Error 0 -5 1 workers.repo_info_worker.9176 repo_info 0 2021-12-20 10:34:33 Success 1 -6 1 workers.insight_worker.9082 insights 0 2021-12-20 10:34:32 Success 0 -7 1 workers.linux_badge_worker.9447 badges 0 2021-12-20 10:34:32 Success 1 -8 24441 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 10:36:31 Success 0 -9 24441 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 10:36:33 Success 0 -10 24441 workers.github_worker.9396 issues 0 2021-12-20 10:36:34 Error 0 -11 24441 workers.repo_info_worker.9176 repo_info 0 2021-12-20 10:36:34 Success 1 -12 24441 workers.linux_badge_worker.9447 badges 0 2021-12-20 10:36:33 Success 0 -13 24441 workers.pull_request_worker.9145 repo_info 0 2021-12-20 10:36:33 Stopped 0 -14 24442 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 10:38:32 Success 0 -15 1 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 10:39:00 Success 0 -16 24442 workers.github_worker.9396 issues 0 2021-12-20 10:38:35 Error 0 -17 24442 workers.insight_worker.9082 insights 0 2021-12-20 10:38:33 Success 0 -18 24442 workers.repo_info_worker.9176 repo_info 0 2021-12-20 10:38:34 Success 1 -19 24442 workers.pull_request_worker.9145 repo_info 0 2021-12-20 10:39:00 Stopped 0 -20 25430 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 10:40:32 Success 0 -21 24442 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 10:40:33 Success 0 -22 25430 workers.insight_worker.9082 insights 0 2021-12-20 10:40:34 Success 0 -23 25430 workers.linux_badge_worker.9447 badges 0 2021-12-20 10:40:34 Success 0 -24 25430 workers.pull_request_worker.9145 repo_info 0 2021-12-20 10:40:34 Stopped 0 -25 25430 workers.repo_info_worker.9176 repo_info 0 2021-12-20 10:40:35 Success 1 -26 25430 workers.insight_worker.9082 insights 0 2021-12-20 10:50:56 Success 0 -27 1 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 10:51:26 Success 0 -28 1 workers.github_worker.9396 issues 0 2021-12-20 10:51:14 Error 0 -29 1 workers.release_worker.9488 releases 0 2021-12-20 10:51:01 Success 1 -30 25430 workers.linux_badge_worker.9447 badges 0 2021-12-20 10:50:56 Success 0 -31 25430 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 10:51:26 Success 0 -32 25430 workers.pull_request_worker.9145 repo_info 0 2021-12-20 10:51:26 Stopped 0 -33 24441 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 10:52:57 Success 0 -34 1 workers.insight_worker.9082 insights 0 2021-12-20 10:52:56 Success 0 -35 1 workers.repo_info_worker.9176 repo_info 0 2021-12-20 10:52:58 Success 1 -36 1 workers.linux_badge_worker.9447 badges 0 2021-12-20 10:52:57 Success 1 -37 24441 workers.release_worker.9488 repo_info 0 2021-12-20 10:52:56 Stopped 0 -38 1 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 10:52:57 Success 0 -39 1 workers.pull_request_worker.9145 repo_info 0 2021-12-20 10:52:57 Stopped 0 -40 24441 workers.insight_worker.9082 insights 0 2021-12-20 10:54:59 Success 2 -41 24442 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 10:54:57 Success 0 -42 24442 workers.github_worker.9396 issues 0 2021-12-20 10:54:58 Error 0 -43 24441 workers.repo_info_worker.9176 repo_info 0 2021-12-20 10:54:58 Success 1 -44 24441 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 10:54:57 Success 0 -45 24441 workers.pull_request_worker.9145 repo_info 0 2021-12-20 10:54:57 Stopped 0 -46 25430 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 10:56:56 Success 0 -47 24442 workers.insight_worker.9082 insights 0 2021-12-20 10:56:57 Success 0 -48 24442 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 10:56:56 Success 0 -49 24442 workers.pull_request_worker.9145 repo_info 0 2021-12-20 10:56:56 Stopped 0 -50 24442 workers.repo_info_worker.9176 repo_info 0 2021-12-20 10:56:58 Success 1 -51 24442 workers.linux_badge_worker.9447 badges 0 2021-12-20 10:56:58 Success 0 -52 24442 workers.insight_worker.9082 insights 0 2021-12-20 11:37:29 Success 0 -53 24442 workers.repo_info_worker.9176 repo_info 0 2021-12-20 11:37:28 Success 1 -54 1 workers.github_worker.9396 issues 0 2021-12-20 11:38:35 Error 0 -55 1 workers.release_worker.9488 releases 0 2021-12-20 11:37:32 Success 1 -56 24442 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 11:37:27 Success 0 -57 24442 workers.pull_request_worker.9145 repo_info 0 2021-12-20 11:37:27 Stopped 0 -58 1 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 11:38:25 Success 0 -59 25430 workers.repo_info_worker.9176 repo_info 0 2021-12-20 11:39:29 Success 1 -60 25430 workers.linux_badge_worker.9447 badges 0 2021-12-20 11:39:28 Success 0 -61 24441 workers.github_worker.9396 issues 0 2021-12-20 11:39:49 Success 0 -62 25430 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 11:39:27 Success 0 -63 25430 workers.pull_request_worker.9145 repo_info 0 2021-12-20 11:39:27 Stopped 0 -64 24441 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 11:39:47 Success 0 -65 25430 workers.insight_worker.9082 insights 0 2021-12-20 11:39:29 Success 0 -66 1 workers.insight_worker.9082 insights 0 2021-12-20 11:41:32 Success 1 -67 1 workers.linux_badge_worker.9447 badges 0 2021-12-20 11:41:28 Success 1 -68 24442 workers.github_worker.9396 issues 0 2021-12-20 11:42:22 Success 0 -69 1 workers.repo_info_worker.9176 repo_info 0 2021-12-20 11:41:30 Success 1 -70 1 workers.pull_request_worker.9145 repo_info 0 2021-12-20 11:41:28 Stopped 0 -71 24442 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 11:42:05 Success 0 -72 24441 workers.insight_worker.9082 insights 0 2021-12-20 11:43:33 Success 1 -73 24441 workers.repo_info_worker.9176 repo_info 0 2021-12-20 11:43:29 Success 1 -74 25430 workers.github_worker.9396 issues 0 2021-12-20 11:43:31 Success 0 -75 24441 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 11:43:49 Success 0 -76 24441 workers.pull_request_worker.9145 repo_info 0 2021-12-20 11:43:49 Stopped 0 -77 25430 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 11:44:10 Success 0 -78 24441 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 11:52:46 Success 0 -79 24441 workers.repo_info_worker.9176 repo_info 0 2021-12-20 11:52:31 Success 1 -80 24441 workers.github_worker.9396 repo_info 0 2021-12-20 12:05:59 Stopped 0 -81 24442 workers.github_worker.9396 repo_info 0 2021-12-20 12:06:20 Stopped 0 -82 25430 workers.github_worker.9396 repo_info 0 2021-12-20 12:07:08 Stopped 0 -83 25430 workers.github_worker.9396 issues 0 2021-12-20 12:07:11 Success 0 -84 24441 workers.pull_request_worker.9145 repo_info 0 2021-12-20 11:53:12 Stopped 0 -85 24442 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 11:54:45 Success 0 -86 24442 workers.repo_info_worker.9176 repo_info 0 2021-12-20 11:54:28 Success 1 -87 24442 workers.linux_badge_worker.9447 badges 0 2021-12-20 11:54:28 Success 0 -88 24441 workers.release_worker.9488 repo_info 0 2021-12-20 11:54:28 Stopped 0 -89 24442 workers.insight_worker.9082 insights 0 2021-12-20 11:54:31 Success 0 -90 24442 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 12:16:33 Success 0 -91 25430 workers.linux_badge_worker.9447 badges 0 2021-12-20 11:56:31 Success 0 -92 25430 workers.repo_info_worker.9176 repo_info 0 2021-12-20 11:56:32 Success 1 -93 25430 workers.insight_worker.9082 insights 0 2021-12-20 11:56:34 Success 0 -94 1 workers.linux_badge_worker.9447 badges 0 2021-12-20 11:58:31 Success 1 -95 1 workers.repo_info_worker.9176 repo_info 0 2021-12-20 11:58:33 Success 1 -96 1 workers.insight_worker.9082 insights 0 2021-12-20 11:58:34 Success 0 -97 24442 workers.pull_request_worker.9145 repo_info 0 2021-12-20 12:16:33 Stopped 0 -98 25430 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 12:17:02 Success 0 -99 25430 workers.pull_request_worker.9145 pull_requests 0 2021-12-20 12:17:06 Success 0 -100 25430 workers.pull_request_worker.9145 repo_info 0 2021-12-20 12:17:07 Stopped 0 -101 1 workers.pull_request_worker.9145 pull_request_files 0 2021-12-20 12:17:08 Success 0 -102 1 workers.pull_request_worker.9145 repo_info 0 2021-12-20 12:17:08 Stopped 0 -103 1 workers.insight_worker.9082 insights 0 2021-12-20 12:43:41 Success 0 -104 25430 workers.pull_request_worker.9145 repo_info 0 2021-12-20 12:43:38 Stopped 0 -105 24441 workers.github_worker.9396 repo_info 14 2021-12-20 12:55:23 Stopped 0 -106 24442 workers.github_worker.9396 repo_info 14 2021-12-20 12:55:44 Stopped 0 -107 25430 workers.github_worker.9396 repo_info 14 2021-12-20 12:56:32 Stopped 0 -108 1 workers.pull_request_worker.9145 pull_requests 1017 2021-12-20 12:58:53 Success 0 -109 24441 workers.insight_worker.9082 insights 0 2021-12-20 12:45:43 Success 1 -110 24441 workers.linux_badge_worker.9447 badges 0 2021-12-20 12:45:39 Success 0 -111 24441 workers.release_worker.9488 repo_info 0 2021-12-20 12:45:38 Stopped 0 -112 24441 workers.repo_info_worker.9176 repo_info 0 2021-12-20 12:45:41 Success 1 -113 24442 workers.insight_worker.9082 insights 0 2021-12-20 12:47:41 Success 0 -114 24442 workers.repo_info_worker.9176 repo_info 1022 2021-12-20 12:47:48 Success 1 -115 24442 workers.linux_badge_worker.9447 badges 0 2021-12-20 12:47:40 Success 0 -116 25430 workers.insight_worker.9082 insights 0 2021-12-20 12:49:41 Success 0 -117 25430 workers.repo_info_worker.9176 repo_info 14 2021-12-20 12:49:48 Success 1 -118 25430 workers.linux_badge_worker.9447 badges 0 2021-12-20 12:49:40 Success 0 -119 1 workers.pull_request_worker.9145 pull_request_files 1017 2021-12-20 13:09:07 Success 0 -120 1 workers.pull_request_worker.9145 repo_info 1017 2021-12-20 13:09:07 Stopped 0 -\. - - --- --- Data for Name: worker_job; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.worker_job (job_model, state, zombie_head, since_id_str, description, last_count, last_run, analysis_state, oauth_id) FROM stdin; -\. - - --- --- Data for Name: worker_oauth; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.worker_oauth (oauth_id, name, consumer_key, consumer_secret, access_token, access_token_secret, repo_directory, platform) FROM stdin; -\. - - --- --- Data for Name: worker_settings_facade; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.worker_settings_facade (id, setting, value, last_modified) FROM stdin; -\. - - --- --- Data for Name: working_commits; Type: TABLE DATA; Schema: augur_operations; Owner: augur --- - -COPY augur_operations.working_commits (repos_id, working_commit) FROM stdin; -\. - - --- --- Data for Name: alembic_version; Type: TABLE DATA; Schema: public; Owner: augur --- - -COPY public.alembic_version (version_num) FROM stdin; -11 -\. - - --- --- Data for Name: annotation_types; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.annotation_types (annotation_type_id, name) FROM stdin; -1 REVIEW -2 OTHER -\. - - --- --- Data for Name: annotations; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.annotations (annotation_id, document_id, annotation_type_id, identifier_id, creator_id, created_ts, comment) FROM stdin; -\. - - --- --- Data for Name: augur_repo_map; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.augur_repo_map (map_id, dosocs_pkg_id, dosocs_pkg_name, repo_id, repo_path) FROM stdin; -\. - - --- --- Data for Name: creator_types; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.creator_types (creator_type_id, name) FROM stdin; -1 Person -2 Organization -3 Tool -\. - - --- --- Data for Name: creators; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.creators (creator_id, creator_type_id, name, email) FROM stdin; -1 3 dosocs2-0.16.1 -\. - - --- --- Data for Name: document_namespaces; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.document_namespaces (document_namespace_id, uri) FROM stdin; -\. - - --- --- Data for Name: documents; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.documents (document_id, document_namespace_id, data_license_id, spdx_version, name, license_list_version, created_ts, creator_comment, document_comment, package_id) FROM stdin; -\. - - --- --- Data for Name: documents_creators; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.documents_creators (document_creator_id, document_id, creator_id) FROM stdin; -\. - - --- --- Data for Name: external_refs; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.external_refs (external_ref_id, document_id, document_namespace_id, id_string, sha256) FROM stdin; -\. - - --- --- Data for Name: file_contributors; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.file_contributors (file_contributor_id, file_id, contributor) FROM stdin; -\. - - --- --- Data for Name: file_types; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.file_types (file_type_id, name) FROM stdin; -4 APPLICATION -3 ARCHIVE -5 AUDIO -2 BINARY -9 DOCUMENTATION -6 IMAGE -11 OTHER -1 SOURCE -10 SPDX -7 TEXT -8 VIDEO -\. - - --- --- Data for Name: files; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.files (file_id, file_type_id, sha256, copyright_text, package_id, comment, notice) FROM stdin; -\. - - --- --- Data for Name: files_licenses; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.files_licenses (file_license_id, file_id, license_id, extracted_text) FROM stdin; -\. - - --- --- Data for Name: files_scans; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.files_scans (file_scan_id, file_id, scanner_id) FROM stdin; -\. - - --- --- Data for Name: identifiers; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.identifiers (identifier_id, document_namespace_id, id_string, document_id, package_id, package_file_id) FROM stdin; -\. - - --- --- Data for Name: licenses; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.licenses (license_id, name, short_name, cross_reference, comment, is_spdx_official) FROM stdin; -1 3dfx Glide License Glide http://spdx.org/licenses/Glide.html t -2 Abstyles License Abstyles http://spdx.org/licenses/Abstyles.html t -3 Academic Free License v1.1 AFL-1.1 http://spdx.org/licenses/AFL-1.1.html t -4 Academic Free License v1.2 AFL-1.2 http://spdx.org/licenses/AFL-1.2.html t -5 Academic Free License v2.0 AFL-2.0 http://spdx.org/licenses/AFL-2.0.html t -6 Academic Free License v2.1 AFL-2.1 http://spdx.org/licenses/AFL-2.1.html t -7 Academic Free License v3.0 AFL-3.0 http://spdx.org/licenses/AFL-3.0.html t -8 Academy of Motion Picture Arts and Sciences BSD AMPAS http://spdx.org/licenses/AMPAS.html t -9 Adaptive Public License 1.0 APL-1.0 http://spdx.org/licenses/APL-1.0.html t -10 Adobe Glyph List License Adobe-Glyph http://spdx.org/licenses/Adobe-Glyph.html t -11 Adobe Postscript AFM License APAFML http://spdx.org/licenses/APAFML.html t -12 Adobe Systems Incorporated Source Code License Agreement Adobe-2006 http://spdx.org/licenses/Adobe-2006.html t -13 Affero General Public License v1.0 AGPL-1.0 http://spdx.org/licenses/AGPL-1.0.html t -14 Afmparse License Afmparse http://spdx.org/licenses/Afmparse.html t -15 Aladdin Free Public License Aladdin http://spdx.org/licenses/Aladdin.html t -16 Amazon Digital Services License ADSL http://spdx.org/licenses/ADSL.html t -17 AMD's plpa_map.c License AMDPLPA http://spdx.org/licenses/AMDPLPA.html t -18 ANTLR Software Rights Notice ANTLR-PD http://spdx.org/licenses/ANTLR-PD.html t -19 Apache License 1.0 Apache-1.0 http://spdx.org/licenses/Apache-1.0.html t -20 Apache License 1.1 Apache-1.1 http://spdx.org/licenses/Apache-1.1.html t -21 Apache License 2.0 Apache-2.0 http://spdx.org/licenses/Apache-2.0.html t -22 Apple MIT License AML http://spdx.org/licenses/AML.html t -23 Apple Public Source License 1.0 APSL-1.0 http://spdx.org/licenses/APSL-1.0.html t -24 Apple Public Source License 1.1 APSL-1.1 http://spdx.org/licenses/APSL-1.1.html t -25 Apple Public Source License 1.2 APSL-1.2 http://spdx.org/licenses/APSL-1.2.html t -26 Apple Public Source License 2.0 APSL-2.0 http://spdx.org/licenses/APSL-2.0.html t -27 Artistic License 1.0 Artistic-1.0 http://spdx.org/licenses/Artistic-1.0.html t -28 Artistic License 1.0 (Perl) Artistic-1.0-Perl http://spdx.org/licenses/Artistic-1.0-Perl.html t -29 Artistic License 1.0 w/clause 8 Artistic-1.0-cl8 http://spdx.org/licenses/Artistic-1.0-cl8.html t -30 Artistic License 2.0 Artistic-2.0 http://spdx.org/licenses/Artistic-2.0.html t -31 Attribution Assurance License AAL http://spdx.org/licenses/AAL.html t -32 Bahyph License Bahyph http://spdx.org/licenses/Bahyph.html t -33 Barr License Barr http://spdx.org/licenses/Barr.html t -34 Beerware License Beerware http://spdx.org/licenses/Beerware.html t -35 BitTorrent Open Source License v1.0 BitTorrent-1.0 http://spdx.org/licenses/BitTorrent-1.0.html t -36 BitTorrent Open Source License v1.1 BitTorrent-1.1 http://spdx.org/licenses/BitTorrent-1.1.html t -37 Boost Software License 1.0 BSL-1.0 http://spdx.org/licenses/BSL-1.0.html t -38 Borceux license Borceux http://spdx.org/licenses/Borceux.html t -39 BSD 2-clause "Simplified" License BSD-2-Clause http://spdx.org/licenses/BSD-2-Clause.html t -40 BSD 2-clause FreeBSD License BSD-2-Clause-FreeBSD http://spdx.org/licenses/BSD-2-Clause-FreeBSD.html t -41 BSD 2-clause NetBSD License BSD-2-Clause-NetBSD http://spdx.org/licenses/BSD-2-Clause-NetBSD.html t -42 BSD 3-clause "New" or "Revised" License BSD-3-Clause http://spdx.org/licenses/BSD-3-Clause.html t -43 BSD 3-clause Clear License BSD-3-Clause-Clear http://spdx.org/licenses/BSD-3-Clause-Clear.html t -44 BSD 4-clause "Original" or "Old" License BSD-4-Clause http://spdx.org/licenses/BSD-4-Clause.html t -45 BSD Protection License BSD-Protection http://spdx.org/licenses/BSD-Protection.html t -46 BSD with attribution BSD-3-Clause-Attribution http://spdx.org/licenses/BSD-3-Clause-Attribution.html t -47 BSD Zero Clause License 0BSD http://spdx.org/licenses/0BSD.html t -48 BSD-4-Clause (University of California-Specific) BSD-4-Clause-UC http://spdx.org/licenses/BSD-4-Clause-UC.html t -49 bzip2 and libbzip2 License v1.0.5 bzip2-1.0.5 http://spdx.org/licenses/bzip2-1.0.5.html t -50 bzip2 and libbzip2 License v1.0.6 bzip2-1.0.6 http://spdx.org/licenses/bzip2-1.0.6.html t -51 Caldera License Caldera http://spdx.org/licenses/Caldera.html t -52 CeCILL Free Software License Agreement v1.0 CECILL-1.0 http://spdx.org/licenses/CECILL-1.0.html t -53 CeCILL Free Software License Agreement v1.1 CECILL-1.1 http://spdx.org/licenses/CECILL-1.1.html t -54 CeCILL Free Software License Agreement v2.0 CECILL-2.0 http://spdx.org/licenses/CECILL-2.0.html t -55 CeCILL Free Software License Agreement v2.1 CECILL-2.1 http://spdx.org/licenses/CECILL-2.1.html t -56 CeCILL-B Free Software License Agreement CECILL-B http://spdx.org/licenses/CECILL-B.html t -57 CeCILL-C Free Software License Agreement CECILL-C http://spdx.org/licenses/CECILL-C.html t -58 Clarified Artistic License ClArtistic http://spdx.org/licenses/ClArtistic.html t -59 CMU License MIT-CMU http://spdx.org/licenses/MIT-CMU.html t -60 CNRI Jython License CNRI-Jython http://spdx.org/licenses/CNRI-Jython.html t -61 CNRI Python License CNRI-Python http://spdx.org/licenses/CNRI-Python.html t -62 CNRI Python Open Source GPL Compatible License Agreement CNRI-Python-GPL-Compatible http://spdx.org/licenses/CNRI-Python-GPL-Compatible.html t -63 Code Project Open License 1.02 CPOL-1.02 http://spdx.org/licenses/CPOL-1.02.html t -64 Common Development and Distribution License 1.0 CDDL-1.0 http://spdx.org/licenses/CDDL-1.0.html t -65 Common Development and Distribution License 1.1 CDDL-1.1 http://spdx.org/licenses/CDDL-1.1.html t -66 Common Public Attribution License 1.0 CPAL-1.0 http://spdx.org/licenses/CPAL-1.0.html t -67 Common Public License 1.0 CPL-1.0 http://spdx.org/licenses/CPL-1.0.html t -68 Computer Associates Trusted Open Source License 1.1 CATOSL-1.1 http://spdx.org/licenses/CATOSL-1.1.html t -69 Condor Public License v1.1 Condor-1.1 http://spdx.org/licenses/Condor-1.1.html t -70 Creative Commons Attribution 1.0 CC-BY-1.0 http://spdx.org/licenses/CC-BY-1.0.html t -71 Creative Commons Attribution 2.0 CC-BY-2.0 http://spdx.org/licenses/CC-BY-2.0.html t -72 Creative Commons Attribution 2.5 CC-BY-2.5 http://spdx.org/licenses/CC-BY-2.5.html t -73 Creative Commons Attribution 3.0 CC-BY-3.0 http://spdx.org/licenses/CC-BY-3.0.html t -74 Creative Commons Attribution 4.0 CC-BY-4.0 http://spdx.org/licenses/CC-BY-4.0.html t -75 Creative Commons Attribution No Derivatives 1.0 CC-BY-ND-1.0 http://spdx.org/licenses/CC-BY-ND-1.0.html t -76 Creative Commons Attribution No Derivatives 2.0 CC-BY-ND-2.0 http://spdx.org/licenses/CC-BY-ND-2.0.html t -77 Creative Commons Attribution No Derivatives 2.5 CC-BY-ND-2.5 http://spdx.org/licenses/CC-BY-ND-2.5.html t -78 Creative Commons Attribution No Derivatives 3.0 CC-BY-ND-3.0 http://spdx.org/licenses/CC-BY-ND-3.0.html t -79 Creative Commons Attribution No Derivatives 4.0 CC-BY-ND-4.0 http://spdx.org/licenses/CC-BY-ND-4.0.html t -80 Creative Commons Attribution Non Commercial 1.0 CC-BY-NC-1.0 http://spdx.org/licenses/CC-BY-NC-1.0.html t -81 Creative Commons Attribution Non Commercial 2.0 CC-BY-NC-2.0 http://spdx.org/licenses/CC-BY-NC-2.0.html t -82 Creative Commons Attribution Non Commercial 2.5 CC-BY-NC-2.5 http://spdx.org/licenses/CC-BY-NC-2.5.html t -83 Creative Commons Attribution Non Commercial 3.0 CC-BY-NC-3.0 http://spdx.org/licenses/CC-BY-NC-3.0.html t -84 Creative Commons Attribution Non Commercial 4.0 CC-BY-NC-4.0 http://spdx.org/licenses/CC-BY-NC-4.0.html t -85 Creative Commons Attribution Non Commercial No Derivatives 1.0 CC-BY-NC-ND-1.0 http://spdx.org/licenses/CC-BY-NC-ND-1.0.html t -86 Creative Commons Attribution Non Commercial No Derivatives 2.0 CC-BY-NC-ND-2.0 http://spdx.org/licenses/CC-BY-NC-ND-2.0.html t -87 Creative Commons Attribution Non Commercial No Derivatives 2.5 CC-BY-NC-ND-2.5 http://spdx.org/licenses/CC-BY-NC-ND-2.5.html t -88 Creative Commons Attribution Non Commercial No Derivatives 3.0 CC-BY-NC-ND-3.0 http://spdx.org/licenses/CC-BY-NC-ND-3.0.html t -89 Creative Commons Attribution Non Commercial No Derivatives 4.0 CC-BY-NC-ND-4.0 http://spdx.org/licenses/CC-BY-NC-ND-4.0.html t -90 Creative Commons Attribution Non Commercial Share Alike 1.0 CC-BY-NC-SA-1.0 http://spdx.org/licenses/CC-BY-NC-SA-1.0.html t -91 Creative Commons Attribution Non Commercial Share Alike 2.0 CC-BY-NC-SA-2.0 http://spdx.org/licenses/CC-BY-NC-SA-2.0.html t -92 Creative Commons Attribution Non Commercial Share Alike 2.5 CC-BY-NC-SA-2.5 http://spdx.org/licenses/CC-BY-NC-SA-2.5.html t -93 Creative Commons Attribution Non Commercial Share Alike 3.0 CC-BY-NC-SA-3.0 http://spdx.org/licenses/CC-BY-NC-SA-3.0.html t -94 Creative Commons Attribution Non Commercial Share Alike 4.0 CC-BY-NC-SA-4.0 http://spdx.org/licenses/CC-BY-NC-SA-4.0.html t -95 Creative Commons Attribution Share Alike 1.0 CC-BY-SA-1.0 http://spdx.org/licenses/CC-BY-SA-1.0.html t -96 Creative Commons Attribution Share Alike 2.0 CC-BY-SA-2.0 http://spdx.org/licenses/CC-BY-SA-2.0.html t -97 Creative Commons Attribution Share Alike 2.5 CC-BY-SA-2.5 http://spdx.org/licenses/CC-BY-SA-2.5.html t -98 Creative Commons Attribution Share Alike 3.0 CC-BY-SA-3.0 http://spdx.org/licenses/CC-BY-SA-3.0.html t -99 Creative Commons Attribution Share Alike 4.0 CC-BY-SA-4.0 http://spdx.org/licenses/CC-BY-SA-4.0.html t -100 Creative Commons Zero v1.0 Universal CC0-1.0 http://spdx.org/licenses/CC0-1.0.html t -101 Crossword License Crossword http://spdx.org/licenses/Crossword.html t -102 CrystalStacker License CrystalStacker http://spdx.org/licenses/CrystalStacker.html t -103 CUA Office Public License v1.0 CUA-OPL-1.0 http://spdx.org/licenses/CUA-OPL-1.0.html t -104 Cube License Cube http://spdx.org/licenses/Cube.html t -105 Deutsche Freie Software Lizenz D-FSL-1.0 http://spdx.org/licenses/D-FSL-1.0.html t -106 diffmark license diffmark http://spdx.org/licenses/diffmark.html t -107 Do What The F*ck You Want To Public License WTFPL http://spdx.org/licenses/WTFPL.html t -108 DOC License DOC http://spdx.org/licenses/DOC.html t -109 Dotseqn License Dotseqn http://spdx.org/licenses/Dotseqn.html t -110 DSDP License DSDP http://spdx.org/licenses/DSDP.html t -111 dvipdfm License dvipdfm http://spdx.org/licenses/dvipdfm.html t -112 Eclipse Public License 1.0 EPL-1.0 http://spdx.org/licenses/EPL-1.0.html t -113 Educational Community License v1.0 ECL-1.0 http://spdx.org/licenses/ECL-1.0.html t -114 Educational Community License v2.0 ECL-2.0 http://spdx.org/licenses/ECL-2.0.html t -115 eGenix.com Public License 1.1.0 eGenix http://spdx.org/licenses/eGenix.html t -116 Eiffel Forum License v1.0 EFL-1.0 http://spdx.org/licenses/EFL-1.0.html t -117 Eiffel Forum License v2.0 EFL-2.0 http://spdx.org/licenses/EFL-2.0.html t -118 Enlightenment License (e16) MIT-advertising http://spdx.org/licenses/MIT-advertising.html t -119 enna License MIT-enna http://spdx.org/licenses/MIT-enna.html t -120 Entessa Public License v1.0 Entessa http://spdx.org/licenses/Entessa.html t -121 Erlang Public License v1.1 ErlPL-1.1 http://spdx.org/licenses/ErlPL-1.1.html t -122 EU DataGrid Software License EUDatagrid http://spdx.org/licenses/EUDatagrid.html t -123 European Union Public License 1.0 EUPL-1.0 http://spdx.org/licenses/EUPL-1.0.html t -124 European Union Public License 1.1 EUPL-1.1 http://spdx.org/licenses/EUPL-1.1.html t -125 Eurosym License Eurosym http://spdx.org/licenses/Eurosym.html t -126 Fair License Fair http://spdx.org/licenses/Fair.html t -127 feh License MIT-feh http://spdx.org/licenses/MIT-feh.html t -128 Frameworx Open License 1.0 Frameworx-1.0 http://spdx.org/licenses/Frameworx-1.0.html t -129 FreeImage Public License v1.0 FreeImage http://spdx.org/licenses/FreeImage.html t -130 Freetype Project License FTL http://spdx.org/licenses/FTL.html t -131 FSF Unlimited License FSFUL http://spdx.org/licenses/FSFUL.html t -132 FSF Unlimited License (with License Retention) FSFULLR http://spdx.org/licenses/FSFULLR.html t -133 Giftware License Giftware http://spdx.org/licenses/Giftware.html t -134 GL2PS License GL2PS http://spdx.org/licenses/GL2PS.html t -135 Glulxe License Glulxe http://spdx.org/licenses/Glulxe.html t -136 GNU Affero General Public License v3.0 AGPL-3.0 http://spdx.org/licenses/AGPL-3.0.html t -137 GNU Free Documentation License v1.1 GFDL-1.1 http://spdx.org/licenses/GFDL-1.1.html t -138 GNU Free Documentation License v1.2 GFDL-1.2 http://spdx.org/licenses/GFDL-1.2.html t -139 GNU Free Documentation License v1.3 GFDL-1.3 http://spdx.org/licenses/GFDL-1.3.html t -140 GNU General Public License v1.0 only GPL-1.0 http://spdx.org/licenses/GPL-1.0.html t -141 GNU General Public License v2.0 only GPL-2.0 http://spdx.org/licenses/GPL-2.0.html t -142 GNU General Public License v3.0 only GPL-3.0 http://spdx.org/licenses/GPL-3.0.html t -143 GNU Lesser General Public License v2.1 only LGPL-2.1 http://spdx.org/licenses/LGPL-2.1.html t -144 GNU Lesser General Public License v3.0 only LGPL-3.0 http://spdx.org/licenses/LGPL-3.0.html t -145 GNU Library General Public License v2 only LGPL-2.0 http://spdx.org/licenses/LGPL-2.0.html t -146 gnuplot License gnuplot http://spdx.org/licenses/gnuplot.html t -147 gSOAP Public License v1.3b gSOAP-1.3b http://spdx.org/licenses/gSOAP-1.3b.html t -148 Haskell Language Report License HaskellReport http://spdx.org/licenses/HaskellReport.html t -149 Historic Permission Notice and Disclaimer HPND http://spdx.org/licenses/HPND.html t -150 IBM PowerPC Initialization and Boot Software IBM-pibs http://spdx.org/licenses/IBM-pibs.html t -151 IBM Public License v1.0 IPL-1.0 http://spdx.org/licenses/IPL-1.0.html t -152 ICU License ICU http://spdx.org/licenses/ICU.html t -153 ImageMagick License ImageMagick http://spdx.org/licenses/ImageMagick.html t -154 iMatix Standard Function Library Agreement iMatix http://spdx.org/licenses/iMatix.html t -155 Imlib2 License Imlib2 http://spdx.org/licenses/Imlib2.html t -156 Independent JPEG Group License IJG http://spdx.org/licenses/IJG.html t -157 Intel ACPI Software License Agreement Intel-ACPI http://spdx.org/licenses/Intel-ACPI.html t -158 Intel Open Source License Intel http://spdx.org/licenses/Intel.html t -159 Interbase Public License v1.0 Interbase-1.0 http://spdx.org/licenses/Interbase-1.0.html t -160 IPA Font License IPA http://spdx.org/licenses/IPA.html t -161 ISC License ISC http://spdx.org/licenses/ISC.html t -162 JasPer License JasPer-2.0 http://spdx.org/licenses/JasPer-2.0.html t -163 JSON License JSON http://spdx.org/licenses/JSON.html t -164 LaTeX Project Public License 1.3a LPPL-1.3a http://spdx.org/licenses/LPPL-1.3a.html t -165 LaTeX Project Public License v1.0 LPPL-1.0 http://spdx.org/licenses/LPPL-1.0.html t -166 LaTeX Project Public License v1.1 LPPL-1.1 http://spdx.org/licenses/LPPL-1.1.html t -167 LaTeX Project Public License v1.2 LPPL-1.2 http://spdx.org/licenses/LPPL-1.2.html t -168 LaTeX Project Public License v1.3c LPPL-1.3c http://spdx.org/licenses/LPPL-1.3c.html t -169 Latex2e License Latex2e http://spdx.org/licenses/Latex2e.html t -170 Lawrence Berkeley National Labs BSD variant license BSD-3-Clause-LBNL http://spdx.org/licenses/BSD-3-Clause-LBNL.html t -171 Leptonica License Leptonica http://spdx.org/licenses/Leptonica.html t -172 Lesser General Public License For Linguistic Resources LGPLLR http://spdx.org/licenses/LGPLLR.html t -173 libpng License Libpng http://spdx.org/licenses/Libpng.html t -174 libtiff License libtiff http://spdx.org/licenses/libtiff.html t -175 Lucent Public License v1.02 LPL-1.02 http://spdx.org/licenses/LPL-1.02.html t -176 Lucent Public License Version 1.0 LPL-1.0 http://spdx.org/licenses/LPL-1.0.html t -177 MakeIndex License MakeIndex http://spdx.org/licenses/MakeIndex.html t -178 Matrix Template Library License MTLL http://spdx.org/licenses/MTLL.html t -179 Microsoft Public License MS-PL http://spdx.org/licenses/MS-PL.html t -180 Microsoft Reciprocal License MS-RL http://spdx.org/licenses/MS-RL.html t -181 MirOS Licence MirOS http://spdx.org/licenses/MirOS.html t -182 MIT +no-false-attribs license MITNFA http://spdx.org/licenses/MITNFA.html t -183 MIT License MIT http://spdx.org/licenses/MIT.html t -184 Motosoto License Motosoto http://spdx.org/licenses/Motosoto.html t -185 Mozilla Public License 1.0 MPL-1.0 http://spdx.org/licenses/MPL-1.0.html t -186 Mozilla Public License 1.1 MPL-1.1 http://spdx.org/licenses/MPL-1.1.html t -187 Mozilla Public License 2.0 MPL-2.0 http://spdx.org/licenses/MPL-2.0.html t -188 Mozilla Public License 2.0 (no copyleft exception) MPL-2.0-no-copyleft-exception http://spdx.org/licenses/MPL-2.0-no-copyleft-exception.html t -189 mpich2 License mpich2 http://spdx.org/licenses/mpich2.html t -190 Multics License Multics http://spdx.org/licenses/Multics.html t -191 Mup License Mup http://spdx.org/licenses/Mup.html t -192 NASA Open Source Agreement 1.3 NASA-1.3 http://spdx.org/licenses/NASA-1.3.html t -193 Naumen Public License Naumen http://spdx.org/licenses/Naumen.html t -194 Net Boolean Public License v1 NBPL-1.0 http://spdx.org/licenses/NBPL-1.0.html t -195 NetCDF license NetCDF http://spdx.org/licenses/NetCDF.html t -196 Nethack General Public License NGPL http://spdx.org/licenses/NGPL.html t -197 Netizen Open Source License NOSL http://spdx.org/licenses/NOSL.html t -198 Netscape Public License v1.0 NPL-1.0 http://spdx.org/licenses/NPL-1.0.html t -199 Netscape Public License v1.1 NPL-1.1 http://spdx.org/licenses/NPL-1.1.html t -200 Newsletr License Newsletr http://spdx.org/licenses/Newsletr.html t -201 No Limit Public License NLPL http://spdx.org/licenses/NLPL.html t -202 Nokia Open Source License Nokia http://spdx.org/licenses/Nokia.html t -203 Non-Profit Open Software License 3.0 NPOSL-3.0 http://spdx.org/licenses/NPOSL-3.0.html t -204 Noweb License Noweb http://spdx.org/licenses/Noweb.html t -205 NRL License NRL http://spdx.org/licenses/NRL.html t -206 NTP License NTP http://spdx.org/licenses/NTP.html t -207 Nunit License Nunit http://spdx.org/licenses/Nunit.html t -208 OCLC Research Public License 2.0 OCLC-2.0 http://spdx.org/licenses/OCLC-2.0.html t -209 ODC Open Database License v1.0 ODbL-1.0 http://spdx.org/licenses/ODbL-1.0.html t -210 ODC Public Domain Dedication & License 1.0 PDDL-1.0 http://spdx.org/licenses/PDDL-1.0.html t -211 Open Group Test Suite License OGTSL http://spdx.org/licenses/OGTSL.html t -212 Open LDAP Public License 2.2.2 OLDAP-2.2.2 http://spdx.org/licenses/OLDAP-2.2.2.html t -213 Open LDAP Public License v1.1 OLDAP-1.1 http://spdx.org/licenses/OLDAP-1.1.html t -214 Open LDAP Public License v1.2 OLDAP-1.2 http://spdx.org/licenses/OLDAP-1.2.html t -215 Open LDAP Public License v1.3 OLDAP-1.3 http://spdx.org/licenses/OLDAP-1.3.html t -216 Open LDAP Public License v1.4 OLDAP-1.4 http://spdx.org/licenses/OLDAP-1.4.html t -217 Open LDAP Public License v2.0 (or possibly 2.0A and 2.0B) OLDAP-2.0 http://spdx.org/licenses/OLDAP-2.0.html t -218 Open LDAP Public License v2.0.1 OLDAP-2.0.1 http://spdx.org/licenses/OLDAP-2.0.1.html t -219 Open LDAP Public License v2.1 OLDAP-2.1 http://spdx.org/licenses/OLDAP-2.1.html t -220 Open LDAP Public License v2.2 OLDAP-2.2 http://spdx.org/licenses/OLDAP-2.2.html t -221 Open LDAP Public License v2.2.1 OLDAP-2.2.1 http://spdx.org/licenses/OLDAP-2.2.1.html t -222 Open LDAP Public License v2.3 OLDAP-2.3 http://spdx.org/licenses/OLDAP-2.3.html t -223 Open LDAP Public License v2.4 OLDAP-2.4 http://spdx.org/licenses/OLDAP-2.4.html t -224 Open LDAP Public License v2.5 OLDAP-2.5 http://spdx.org/licenses/OLDAP-2.5.html t -225 Open LDAP Public License v2.6 OLDAP-2.6 http://spdx.org/licenses/OLDAP-2.6.html t -226 Open LDAP Public License v2.7 OLDAP-2.7 http://spdx.org/licenses/OLDAP-2.7.html t -227 Open LDAP Public License v2.8 OLDAP-2.8 http://spdx.org/licenses/OLDAP-2.8.html t -228 Open Market License OML http://spdx.org/licenses/OML.html t -229 Open Public License v1.0 OPL-1.0 http://spdx.org/licenses/OPL-1.0.html t -230 Open Software License 1.0 OSL-1.0 http://spdx.org/licenses/OSL-1.0.html t -231 Open Software License 1.1 OSL-1.1 http://spdx.org/licenses/OSL-1.1.html t -232 Open Software License 2.0 OSL-2.0 http://spdx.org/licenses/OSL-2.0.html t -233 Open Software License 2.1 OSL-2.1 http://spdx.org/licenses/OSL-2.1.html t -234 Open Software License 3.0 OSL-3.0 http://spdx.org/licenses/OSL-3.0.html t -235 OpenSSL License OpenSSL http://spdx.org/licenses/OpenSSL.html t -236 PHP License v3.0 PHP-3.0 http://spdx.org/licenses/PHP-3.0.html t -237 PHP License v3.01 PHP-3.01 http://spdx.org/licenses/PHP-3.01.html t -238 Plexus Classworlds License Plexus http://spdx.org/licenses/Plexus.html t -239 PostgreSQL License PostgreSQL http://spdx.org/licenses/PostgreSQL.html t -240 psfrag License psfrag http://spdx.org/licenses/psfrag.html t -241 psutils License psutils http://spdx.org/licenses/psutils.html t -242 Python License 2.0 Python-2.0 http://spdx.org/licenses/Python-2.0.html t -243 Q Public License 1.0 QPL-1.0 http://spdx.org/licenses/QPL-1.0.html t -244 Qhull License Qhull http://spdx.org/licenses/Qhull.html t -245 Rdisc License Rdisc http://spdx.org/licenses/Rdisc.html t -246 RealNetworks Public Source License v1.0 RPSL-1.0 http://spdx.org/licenses/RPSL-1.0.html t -247 Reciprocal Public License 1.1 RPL-1.1 http://spdx.org/licenses/RPL-1.1.html t -248 Reciprocal Public License 1.5 RPL-1.5 http://spdx.org/licenses/RPL-1.5.html t -249 Red Hat eCos Public License v1.1 RHeCos-1.1 http://spdx.org/licenses/RHeCos-1.1.html t -250 Ricoh Source Code Public License RSCPL http://spdx.org/licenses/RSCPL.html t -251 RSA Message-Digest License RSA-MD http://spdx.org/licenses/RSA-MD.html t -252 Ruby License Ruby http://spdx.org/licenses/Ruby.html t -253 Sax Public Domain Notice SAX-PD http://spdx.org/licenses/SAX-PD.html t -254 Saxpath License Saxpath http://spdx.org/licenses/Saxpath.html t -255 SCEA Shared Source License SCEA http://spdx.org/licenses/SCEA.html t -256 Scheme Widget Library (SWL) Software License Agreement SWL http://spdx.org/licenses/SWL.html t -257 Sendmail License Sendmail http://spdx.org/licenses/Sendmail.html t -258 SGI Free Software License B v1.0 SGI-B-1.0 http://spdx.org/licenses/SGI-B-1.0.html t -259 SGI Free Software License B v1.1 SGI-B-1.1 http://spdx.org/licenses/SGI-B-1.1.html t -260 SGI Free Software License B v2.0 SGI-B-2.0 http://spdx.org/licenses/SGI-B-2.0.html t -261 SIL Open Font License 1.0 OFL-1.0 http://spdx.org/licenses/OFL-1.0.html t -262 SIL Open Font License 1.1 OFL-1.1 http://spdx.org/licenses/OFL-1.1.html t -263 Simple Public License 2.0 SimPL-2.0 http://spdx.org/licenses/SimPL-2.0.html t -264 Sleepycat License Sleepycat http://spdx.org/licenses/Sleepycat.html t -265 SNIA Public License 1.1 SNIA http://spdx.org/licenses/SNIA.html t -266 Spencer License 86 Spencer-86 http://spdx.org/licenses/Spencer-86.html t -267 Spencer License 94 Spencer-94 http://spdx.org/licenses/Spencer-94.html t -268 Spencer License 99 Spencer-99 http://spdx.org/licenses/Spencer-99.html t -269 Standard ML of New Jersey License SMLNJ http://spdx.org/licenses/SMLNJ.html t -270 SugarCRM Public License v1.1.3 SugarCRM-1.1.3 http://spdx.org/licenses/SugarCRM-1.1.3.html t -271 Sun Industry Standards Source License v1.1 SISSL http://spdx.org/licenses/SISSL.html t -272 Sun Industry Standards Source License v1.2 SISSL-1.2 http://spdx.org/licenses/SISSL-1.2.html t -273 Sun Public License v1.0 SPL-1.0 http://spdx.org/licenses/SPL-1.0.html t -274 Sybase Open Watcom Public License 1.0 Watcom-1.0 http://spdx.org/licenses/Watcom-1.0.html t -275 TCL/TK License TCL http://spdx.org/licenses/TCL.html t -276 The Unlicense Unlicense http://spdx.org/licenses/Unlicense.html t -277 TMate Open Source License TMate http://spdx.org/licenses/TMate.html t -278 TORQUE v2.5+ Software License v1.1 TORQUE-1.1 http://spdx.org/licenses/TORQUE-1.1.html t -279 Trusster Open Source License TOSL http://spdx.org/licenses/TOSL.html t -280 Unicode Terms of Use Unicode-TOU http://spdx.org/licenses/Unicode-TOU.html t -281 Universal Permissive License v1.0 UPL-1.0 http://spdx.org/licenses/UPL-1.0.html t -282 University of Illinois/NCSA Open Source License NCSA http://spdx.org/licenses/NCSA.html t -283 Vim License Vim http://spdx.org/licenses/Vim.html t -284 VOSTROM Public License for Open Source VOSTROM http://spdx.org/licenses/VOSTROM.html t -285 Vovida Software License v1.0 VSL-1.0 http://spdx.org/licenses/VSL-1.0.html t -286 W3C Software Notice and License (1998-07-20) W3C-19980720 http://spdx.org/licenses/W3C-19980720.html t -287 W3C Software Notice and License (2002-12-31) W3C http://spdx.org/licenses/W3C.html t -288 Wsuipa License Wsuipa http://spdx.org/licenses/Wsuipa.html t -289 X.Net License Xnet http://spdx.org/licenses/Xnet.html t -290 X11 License X11 http://spdx.org/licenses/X11.html t -291 Xerox License Xerox http://spdx.org/licenses/Xerox.html t -292 XFree86 License 1.1 XFree86-1.1 http://spdx.org/licenses/XFree86-1.1.html t -293 xinetd License xinetd http://spdx.org/licenses/xinetd.html t -294 XPP License xpp http://spdx.org/licenses/xpp.html t -295 XSkat License XSkat http://spdx.org/licenses/XSkat.html t -296 Yahoo! Public License v1.0 YPL-1.0 http://spdx.org/licenses/YPL-1.0.html t -297 Yahoo! Public License v1.1 YPL-1.1 http://spdx.org/licenses/YPL-1.1.html t -298 Zed License Zed http://spdx.org/licenses/Zed.html t -299 Zend License v2.0 Zend-2.0 http://spdx.org/licenses/Zend-2.0.html t -300 Zimbra Public License v1.3 Zimbra-1.3 http://spdx.org/licenses/Zimbra-1.3.html t -301 Zimbra Public License v1.4 Zimbra-1.4 http://spdx.org/licenses/Zimbra-1.4.html t -302 zlib License Zlib http://spdx.org/licenses/Zlib.html t -303 zlib/libpng License with Acknowledgement zlib-acknowledgement http://spdx.org/licenses/zlib-acknowledgement.html t -304 Zope Public License 1.1 ZPL-1.1 http://spdx.org/licenses/ZPL-1.1.html t -305 Zope Public License 2.0 ZPL-2.0 http://spdx.org/licenses/ZPL-2.0.html t -306 Zope Public License 2.1 ZPL-2.1 http://spdx.org/licenses/ZPL-2.1.html t -307 eCos license version 2.0 eCos-2.0 http://spdx.org/licenses/eCos-2.0 t -308 GNU General Public License v1.0 or later GPL-1.0+ http://spdx.org/licenses/GPL-1.0+ t -309 GNU General Public License v2.0 or later GPL-2.0+ http://spdx.org/licenses/GPL-2.0+ t -310 GNU General Public License v2.0 w/Autoconf exception GPL-2.0-with-autoconf-exception http://spdx.org/licenses/GPL-2.0-with-autoconf-exception t -311 GNU General Public License v2.0 w/Bison exception GPL-2.0-with-bison-exception http://spdx.org/licenses/GPL-2.0-with-bison-exception t -312 GNU General Public License v2.0 w/Classpath exception GPL-2.0-with-classpath-exception http://spdx.org/licenses/GPL-2.0-with-classpath-exception t -313 GNU General Public License v2.0 w/Font exception GPL-2.0-with-font-exception http://spdx.org/licenses/GPL-2.0-with-font-exception t -314 GNU General Public License v2.0 w/GCC Runtime Library exception GPL-2.0-with-GCC-exception http://spdx.org/licenses/GPL-2.0-with-GCC-exception t -315 GNU General Public License v3.0 or later GPL-3.0+ http://spdx.org/licenses/GPL-3.0+ t -316 GNU General Public License v3.0 w/Autoconf exception GPL-3.0-with-autoconf-exception http://spdx.org/licenses/GPL-3.0-with-autoconf-exception t -317 GNU General Public License v3.0 w/GCC Runtime Library exception GPL-3.0-with-GCC-exception http://spdx.org/licenses/GPL-3.0-with-GCC-exception t -318 GNU Lesser General Public License v2.1 or later LGPL-2.1+ http://spdx.org/licenses/LGPL-2.1+ t -319 GNU Lesser General Public License v3.0 or later LGPL-3.0+ http://spdx.org/licenses/LGPL-3.0+ t -320 GNU Library General Public License v2 or later LGPL-2.0+ http://spdx.org/licenses/LGPL-2.0+ t -321 Standard ML of New Jersey License StandardML-NJ http://spdx.org/licenses/StandardML-NJ t -322 wxWindows Library License WXwindows http://spdx.org/licenses/WXwindows t -\. - - --- --- Data for Name: packages; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.packages (package_id, name, version, file_name, supplier_id, originator_id, download_location, verification_code, ver_code_excluded_file_id, sha256, home_page, source_info, concluded_license_id, declared_license_id, license_comment, copyright_text, summary, description, comment, dosocs2_dir_code) FROM stdin; -\. - - --- --- Data for Name: packages_files; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.packages_files (package_file_id, package_id, file_id, concluded_license_id, license_comment, file_name) FROM stdin; -\. - - --- --- Data for Name: packages_scans; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.packages_scans (package_scan_id, package_id, scanner_id) FROM stdin; -\. - - --- --- Data for Name: projects; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.projects (package_id, name, homepage, uri) FROM stdin; -\. - - --- --- Data for Name: relationship_types; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.relationship_types (relationship_type_id, name) FROM stdin; -1 DESCRIBES -2 DESCRIBED_BY -3 CONTAINS -4 CONTAINED_BY -5 GENERATES -6 GENERATED_FROM -7 ANCESTOR_OF -8 DESCENDANT_OF -9 VARIANT_OF -10 DISTRIBUTION_ARTIFACT -11 PATCH_FOR -12 PATCH_APPLIED -13 COPY_OF -14 FILE_ADDED -15 FILE_DELETED -16 FILE_MODIFIED -17 EXPANDED_FROM_ARCHIVE -18 DYNAMIC_LINK -19 STATIC_LINK -20 DATA_FILE_OF -21 TEST_CASE_OF -22 BUILD_TOOL_OF -23 DOCUMENTATION_OF -24 OPTIONAL_COMPONENT_OF -25 METAFILE_OF -26 PACKAGE_OF -27 AMENDS -28 PREREQUISITE_FOR -29 HAS_PREREQUISITE -30 OTHER -\. - - --- --- Data for Name: relationships; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.relationships (relationship_id, left_identifier_id, right_identifier_id, relationship_type_id, relationship_comment) FROM stdin; -\. - - --- --- Data for Name: sbom_scans; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.sbom_scans (repo_id, sbom_scan) FROM stdin; -\. - - --- --- Data for Name: scanners; Type: TABLE DATA; Schema: spdx; Owner: augur --- - -COPY spdx.scanners (scanner_id, name) FROM stdin; -\. - - --- --- Name: augur_data.repo_insights_ri_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data."augur_data.repo_insights_ri_id_seq"', 25430, false); - - --- --- Name: chaoss_metric_status_cms_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.chaoss_metric_status_cms_id_seq', 1, false); - - --- --- Name: chaoss_user_chaoss_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.chaoss_user_chaoss_id_seq', 1, false); - - --- --- Name: commit_comment_ref_cmt_comment_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.commit_comment_ref_cmt_comment_id_seq', 25430, false); - - --- --- Name: commit_parents_parent_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.commit_parents_parent_id_seq', 25430, false); - - --- --- Name: commits_cmt_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.commits_cmt_id_seq', 25430, false); - - --- --- Name: contributor_affiliations_ca_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.contributor_affiliations_ca_id_seq', 25430, false); - - --- --- Name: contributor_repo_cntrb_repo_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.contributor_repo_cntrb_repo_id_seq', 1, false); - - --- --- Name: contributors_aliases_cntrb_a_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.contributors_aliases_cntrb_a_id_seq', 25430, false); - - --- --- Name: contributors_aliases_cntrb_alias_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.contributors_aliases_cntrb_alias_id_seq', 1, false); - - --- --- Name: contributors_cntrb_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.contributors_cntrb_id_seq', 25430, false); - - --- --- Name: contributors_history_cntrb_history_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.contributors_history_cntrb_history_id_seq', 25430, false); - - --- --- Name: discourse_insights_msg_discourse_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.discourse_insights_msg_discourse_id_seq', 1, false); - - --- --- Name: discourse_insights_msg_discourse_id_seq1; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.discourse_insights_msg_discourse_id_seq1', 1, false); - - --- --- Name: issue_assignees_issue_assignee_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.issue_assignees_issue_assignee_id_seq', 1, false); - - --- --- Name: issue_events_event_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.issue_events_event_id_seq', 25430, false); - - --- --- Name: issue_labels_issue_label_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.issue_labels_issue_label_id_seq', 25430, false); - - --- --- Name: issue_message_ref_issue_msg_ref_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.issue_message_ref_issue_msg_ref_id_seq', 25430, false); - - --- --- Name: issue_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.issue_seq', 31000, false); - - --- --- Name: libraries_library_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.libraries_library_id_seq', 25430, false); - - --- --- Name: library_dependencies_lib_dependency_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.library_dependencies_lib_dependency_id_seq', 25430, false); - - --- --- Name: library_version_library_version_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.library_version_library_version_id_seq', 25430, false); - - --- --- Name: lstm_anomaly_models_model_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.lstm_anomaly_models_model_id_seq', 1, false); - - --- --- Name: lstm_anomaly_results_result_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.lstm_anomaly_results_result_id_seq', 1, false); - - --- --- Name: message_analysis_msg_analysis_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.message_analysis_msg_analysis_id_seq', 1, false); - - --- --- Name: message_analysis_summary_msg_summary_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.message_analysis_summary_msg_summary_id_seq', 1, false); - - --- --- Name: message_msg_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.message_msg_id_seq', 25430, false); - - --- --- Name: message_sentiment_msg_analysis_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.message_sentiment_msg_analysis_id_seq', 1, false); - - --- --- Name: message_sentiment_summary_msg_summary_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.message_sentiment_summary_msg_summary_id_seq', 1, false); - - --- --- Name: platform_pltfrm_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.platform_pltfrm_id_seq', 25430, false); - - --- --- Name: pull_request_analysis_pull_request_analysis_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_analysis_pull_request_analysis_id_seq', 1, false); - - --- --- Name: pull_request_assignees_pr_assignee_map_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_assignees_pr_assignee_map_id_seq', 25430, false); - - --- --- Name: pull_request_commits_pr_cmt_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_commits_pr_cmt_id_seq', 1, false); - - --- --- Name: pull_request_events_pr_event_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_events_pr_event_id_seq', 25430, false); - - --- --- Name: pull_request_files_pr_file_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_files_pr_file_id_seq', 25150, false); - - --- --- Name: pull_request_labels_pr_label_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_labels_pr_label_id_seq', 25430, false); - - --- --- Name: pull_request_message_ref_pr_msg_ref_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_message_ref_pr_msg_ref_id_seq', 25430, false); - - --- --- Name: pull_request_meta_pr_repo_meta_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_meta_pr_repo_meta_id_seq', 25430, false); - - --- --- Name: pull_request_repo_pr_repo_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_repo_pr_repo_id_seq', 25430, false); - - --- --- Name: pull_request_review_message_ref_pr_review_msg_ref_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_review_message_ref_pr_review_msg_ref_id_seq', 1, false); - - --- --- Name: pull_request_reviewers_pr_reviewer_map_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_reviewers_pr_reviewer_map_id_seq', 25430, false); - - --- --- Name: pull_request_reviews_pr_review_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_reviews_pr_review_id_seq', 1, false); - - --- --- Name: pull_request_teams_pr_team_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_request_teams_pr_team_id_seq', 25430, false); - - --- --- Name: pull_requests_pull_request_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.pull_requests_pull_request_id_seq', 25430, false); - - --- --- Name: releases_release_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.releases_release_id_seq', 1, false); - - --- --- Name: repo_badging_badge_collection_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_badging_badge_collection_id_seq', 25012, false); - - --- --- Name: repo_cluster_messages_msg_cluster_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_cluster_messages_msg_cluster_id_seq', 1, false); - - --- --- Name: repo_dependencies_repo_dependencies_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_dependencies_repo_dependencies_id_seq', 1, false); - - --- --- Name: repo_deps_libyear_repo_deps_libyear_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_deps_libyear_repo_deps_libyear_id_seq', 1, false); - - --- --- Name: repo_deps_scorecard_repo_deps_scorecard_id_seq1; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_deps_scorecard_repo_deps_scorecard_id_seq1', 1, false); - - --- --- Name: repo_group_insights_rgi_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_group_insights_rgi_id_seq', 25430, false); - - --- --- Name: repo_groups_list_serve_rgls_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_groups_list_serve_rgls_id_seq', 25430, false); - - --- --- Name: repo_groups_repo_group_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_groups_repo_group_id_seq', 25430, false); - - --- --- Name: repo_info_repo_info_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_info_repo_info_id_seq', 25430, false); - - --- --- Name: repo_insights_records_ri_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_insights_records_ri_id_seq', 1, false); - - --- --- Name: repo_insights_ri_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_insights_ri_id_seq', 1, false); - - --- --- Name: repo_labor_repo_labor_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_labor_repo_labor_id_seq', 25430, false); - - --- --- Name: repo_meta_rmeta_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_meta_rmeta_id_seq', 25430, false); - - --- --- Name: repo_repo_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_repo_id_seq', 25430, false); - - --- --- Name: repo_sbom_scans_rsb_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_sbom_scans_rsb_id_seq', 25430, false); - - --- --- Name: repo_stats_rstat_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_stats_rstat_id_seq', 25430, false); - - --- --- Name: repo_test_coverage_repo_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_test_coverage_repo_id_seq', 1, false); - - --- --- Name: repo_topic_repo_topic_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.repo_topic_repo_topic_id_seq', 1, false); - - --- --- Name: topic_words_topic_words_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.topic_words_topic_words_id_seq', 1, false); - - --- --- Name: unresolved_commit_emails_email_unresolved_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.unresolved_commit_emails_email_unresolved_id_seq', 1, false); - - --- --- Name: utility_log_id_seq; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.utility_log_id_seq', 1, false); - - --- --- Name: utility_log_id_seq1; Type: SEQUENCE SET; Schema: augur_data; Owner: augur --- - -SELECT pg_catalog.setval('augur_data.utility_log_id_seq1', 1, false); - - --- --- Name: affiliations_corp_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur --- - -SELECT pg_catalog.setval('augur_operations.affiliations_corp_id_seq', 620000, false); - - --- --- Name: augur_settings_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur --- - -SELECT pg_catalog.setval('augur_operations.augur_settings_id_seq', 1, false); - - --- --- Name: config_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur --- - -SELECT pg_catalog.setval('augur_operations.config_id_seq', 35, true); - - --- --- Name: gh_worker_history_history_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur --- - -SELECT pg_catalog.setval('augur_operations.gh_worker_history_history_id_seq', 15000, false); - - --- --- Name: users_user_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur --- - -SELECT pg_catalog.setval('augur_operations.users_user_id_seq', 1, false); - - --- --- Name: worker_oauth_oauth_id_seq; Type: SEQUENCE SET; Schema: augur_operations; Owner: augur --- - -SELECT pg_catalog.setval('augur_operations.worker_oauth_oauth_id_seq', 1000, false); - - --- --- Name: annotation_types_annotation_type_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.annotation_types_annotation_type_id_seq', 1, false); - - --- --- Name: annotations_annotation_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.annotations_annotation_id_seq', 1, false); - - --- --- Name: augur_repo_map_map_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.augur_repo_map_map_id_seq', 1, false); - - --- --- Name: creator_types_creator_type_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.creator_types_creator_type_id_seq', 1, false); - - --- --- Name: creators_creator_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.creators_creator_id_seq', 1, false); - - --- --- Name: document_namespaces_document_namespace_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.document_namespaces_document_namespace_id_seq', 1, false); - - --- --- Name: documents_creators_document_creator_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.documents_creators_document_creator_id_seq', 1, false); - - --- --- Name: documents_document_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.documents_document_id_seq', 1, false); - - --- --- Name: external_refs_external_ref_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.external_refs_external_ref_id_seq', 1, false); - - --- --- Name: file_contributors_file_contributor_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.file_contributors_file_contributor_id_seq', 1, false); - - --- --- Name: file_types_file_type_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.file_types_file_type_id_seq', 1, false); - - --- --- Name: files_file_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.files_file_id_seq', 1, false); - - --- --- Name: files_licenses_file_license_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.files_licenses_file_license_id_seq', 1, false); - - --- --- Name: files_scans_file_scan_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.files_scans_file_scan_id_seq', 1, false); - - --- --- Name: identifiers_identifier_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.identifiers_identifier_id_seq', 1, false); - - --- --- Name: licenses_license_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.licenses_license_id_seq', 1, false); - - --- --- Name: packages_files_package_file_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.packages_files_package_file_id_seq', 1, false); - - --- --- Name: packages_package_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.packages_package_id_seq', 1, false); - - --- --- Name: packages_scans_package_scan_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.packages_scans_package_scan_id_seq', 1, false); - - --- --- Name: projects_package_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.projects_package_id_seq', 1, false); - - --- --- Name: relationship_types_relationship_type_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.relationship_types_relationship_type_id_seq', 1, false); - - --- --- Name: relationships_relationship_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.relationships_relationship_id_seq', 1, false); - - --- --- Name: scanners_scanner_id_seq; Type: SEQUENCE SET; Schema: spdx; Owner: augur --- - -SELECT pg_catalog.setval('spdx.scanners_scanner_id_seq', 1, false); - - --- --- Name: contributors GH-UNIQUE-C; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributors - ADD CONSTRAINT "GH-UNIQUE-C" UNIQUE (gh_login) DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: contributors GL-UNIQUE-B; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributors - ADD CONSTRAINT "GL-UNIQUE-B" UNIQUE (gl_id) DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: contributors GL-UNIQUE-C; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributors - ADD CONSTRAINT "GL-UNIQUE-C" UNIQUE (gl_username) DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: contributors GL-cntrb-LOGIN-UNIQUE; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributors - ADD CONSTRAINT "GL-cntrb-LOGIN-UNIQUE" UNIQUE (cntrb_login); - - --- --- Name: pull_request_assignees assigniees-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_assignees - ADD CONSTRAINT "assigniees-unique" UNIQUE (pull_request_id, pr_assignee_src_id); - - --- --- Name: chaoss_metric_status chaoss_metric_status_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.chaoss_metric_status - ADD CONSTRAINT chaoss_metric_status_pkey PRIMARY KEY (cms_id); - - --- --- Name: chaoss_user chaoss_unique_email_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.chaoss_user - ADD CONSTRAINT chaoss_unique_email_key UNIQUE (chaoss_email); - - --- --- Name: chaoss_user chaoss_user_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.chaoss_user - ADD CONSTRAINT chaoss_user_pkey PRIMARY KEY (chaoss_id); - - --- --- Name: contributor_repo cntrb_repo_id_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributor_repo - ADD CONSTRAINT cntrb_repo_id_key PRIMARY KEY (cntrb_repo_id); - - --- --- Name: commit_comment_ref commit_comment_ref_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commit_comment_ref - ADD CONSTRAINT commit_comment_ref_pkey PRIMARY KEY (cmt_comment_id); - - --- --- Name: commit_parents commit_parents_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commit_parents - ADD CONSTRAINT commit_parents_pkey PRIMARY KEY (cmt_id, parent_id); - - --- --- Name: commit_comment_ref commitcomment; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commit_comment_ref - ADD CONSTRAINT commitcomment UNIQUE (cmt_comment_src_id); - - --- --- Name: commits commits_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commits - ADD CONSTRAINT commits_pkey PRIMARY KEY (cmt_id); - - --- --- Name: contributors_aliases contributor-alias-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributors_aliases - ADD CONSTRAINT "contributor-alias-unique" UNIQUE (alias_email); - - --- --- Name: contributor_affiliations contributor_affiliations_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributor_affiliations - ADD CONSTRAINT contributor_affiliations_pkey PRIMARY KEY (ca_id); - - --- --- Name: contributors contributors-pk; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributors - ADD CONSTRAINT "contributors-pk" PRIMARY KEY (cntrb_id); - - --- --- Name: contributors_aliases contributors_aliases_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributors_aliases - ADD CONSTRAINT contributors_aliases_pkey PRIMARY KEY (cntrb_alias_id); - - --- --- Name: discourse_insights discourse_insights_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.discourse_insights - ADD CONSTRAINT discourse_insights_pkey PRIMARY KEY (msg_discourse_id); - - --- --- Name: contributor_repo eventer; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributor_repo - ADD CONSTRAINT eventer UNIQUE (event_id, tool_version); - - --- --- Name: exclude exclude_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.exclude - ADD CONSTRAINT exclude_pkey PRIMARY KEY (id); - - --- --- Name: issue_assignees issue-assignee-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_assignees - ADD CONSTRAINT "issue-assignee-insert-unique" UNIQUE (issue_assignee_src_id, issue_id); - - --- --- Name: issues issue-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issues - ADD CONSTRAINT "issue-insert-unique" UNIQUE (issue_url); - - --- --- Name: issue_message_ref issue-message-ref-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_message_ref - ADD CONSTRAINT "issue-message-ref-insert-unique" UNIQUE (issue_msg_ref_src_comment_id, issue_id); - - --- --- Name: issue_assignees issue_assignees_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_assignees - ADD CONSTRAINT issue_assignees_pkey PRIMARY KEY (issue_assignee_id); - - --- --- Name: issue_events issue_events_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_events - ADD CONSTRAINT issue_events_pkey PRIMARY KEY (event_id); - - --- --- Name: issue_labels issue_labels_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_labels - ADD CONSTRAINT issue_labels_pkey PRIMARY KEY (issue_label_id); - - --- --- Name: issue_message_ref issue_message_ref_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_message_ref - ADD CONSTRAINT issue_message_ref_pkey PRIMARY KEY (issue_msg_ref_id); - - --- --- Name: issues issues_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issues - ADD CONSTRAINT issues_pkey PRIMARY KEY (issue_id); - - --- --- Name: libraries libraries_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.libraries - ADD CONSTRAINT libraries_pkey PRIMARY KEY (library_id); - - --- --- Name: library_dependencies library_dependencies_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.library_dependencies - ADD CONSTRAINT library_dependencies_pkey PRIMARY KEY (lib_dependency_id); - - --- --- Name: library_version library_version_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.library_version - ADD CONSTRAINT library_version_pkey PRIMARY KEY (library_version_id); - - --- --- Name: lstm_anomaly_models lstm_anomaly_models_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.lstm_anomaly_models - ADD CONSTRAINT lstm_anomaly_models_pkey PRIMARY KEY (model_id); - - --- --- Name: lstm_anomaly_results lstm_anomaly_results_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.lstm_anomaly_results - ADD CONSTRAINT lstm_anomaly_results_pkey PRIMARY KEY (result_id); - - --- --- Name: message message-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message - ADD CONSTRAINT "message-insert-unique" UNIQUE (platform_msg_id); - - --- --- Name: message_analysis message_analysis_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message_analysis - ADD CONSTRAINT message_analysis_pkey PRIMARY KEY (msg_analysis_id); - - --- --- Name: message_analysis_summary message_analysis_summary_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message_analysis_summary - ADD CONSTRAINT message_analysis_summary_pkey PRIMARY KEY (msg_summary_id); - - --- --- Name: message message_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message - ADD CONSTRAINT message_pkey PRIMARY KEY (msg_id); - - --- --- Name: message_sentiment message_sentiment_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message_sentiment - ADD CONSTRAINT message_sentiment_pkey PRIMARY KEY (msg_analysis_id); - - --- --- Name: message_sentiment_summary message_sentiment_summary_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message_sentiment_summary - ADD CONSTRAINT message_sentiment_summary_pkey PRIMARY KEY (msg_summary_id); - - --- --- Name: pull_request_events pr-unqiue-event; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_events - ADD CONSTRAINT "pr-unqiue-event" UNIQUE (node_id); - - --- --- Name: pull_request_commits pr_commit_nk; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_commits - ADD CONSTRAINT pr_commit_nk UNIQUE (pull_request_id, repo_id, pr_cmt_sha); - - --- --- Name: pull_request_events pr_events_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_events - ADD CONSTRAINT pr_events_pkey PRIMARY KEY (pr_event_id); - - --- --- Name: pull_request_review_message_ref pr_review_msg_ref_id; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_review_message_ref - ADD CONSTRAINT pr_review_msg_ref_id PRIMARY KEY (pr_review_msg_ref_id); - - --- --- Name: pull_request_files prfiles_unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_files - ADD CONSTRAINT prfiles_unique UNIQUE (pull_request_id, repo_id, pr_file_path); - - --- --- Name: pull_requests pull-request-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_requests - ADD CONSTRAINT "pull-request-insert-unique" UNIQUE (pr_url); - - --- --- Name: pull_request_message_ref pull-request-message-ref-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_message_ref - ADD CONSTRAINT "pull-request-message-ref-insert-unique" UNIQUE (pr_message_ref_src_comment_id, pull_request_id); - - --- --- Name: pull_request_meta pull-request-meta-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_meta - ADD CONSTRAINT "pull-request-meta-insert-unique" UNIQUE (pull_request_id, pr_head_or_base, pr_sha); - - --- --- Name: pull_request_review_message_ref pull-request-review-message-ref-insert-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_review_message_ref - ADD CONSTRAINT "pull-request-review-message-ref-insert-unique" UNIQUE (pr_review_msg_src_id); - - --- --- Name: pull_request_analysis pull_request_analysis_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_analysis - ADD CONSTRAINT pull_request_analysis_pkey PRIMARY KEY (pull_request_analysis_id); - - --- --- Name: pull_request_assignees pull_request_assignees_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_assignees - ADD CONSTRAINT pull_request_assignees_pkey PRIMARY KEY (pr_assignee_map_id); - - --- --- Name: pull_request_commits pull_request_commits_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_commits - ADD CONSTRAINT pull_request_commits_pkey PRIMARY KEY (pr_cmt_id); - - --- --- Name: pull_request_files pull_request_files_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_files - ADD CONSTRAINT pull_request_files_pkey PRIMARY KEY (pr_file_id); - - --- --- Name: pull_request_labels pull_request_labels_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_labels - ADD CONSTRAINT pull_request_labels_pkey PRIMARY KEY (pr_label_id); - - --- --- Name: pull_request_message_ref pull_request_message_ref_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_message_ref - ADD CONSTRAINT pull_request_message_ref_pkey PRIMARY KEY (pr_msg_ref_id); - - --- --- Name: pull_request_meta pull_request_meta_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_meta - ADD CONSTRAINT pull_request_meta_pkey PRIMARY KEY (pr_repo_meta_id); - - --- --- Name: pull_request_repo pull_request_repo_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_repo - ADD CONSTRAINT pull_request_repo_pkey PRIMARY KEY (pr_repo_id); - - --- --- Name: pull_request_reviews pull_request_review_id; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviews - ADD CONSTRAINT pull_request_review_id PRIMARY KEY (pr_review_id); - - --- --- Name: pull_request_reviewers pull_request_reviewers_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviewers - ADD CONSTRAINT pull_request_reviewers_pkey PRIMARY KEY (pr_reviewer_map_id); - - --- --- Name: pull_request_teams pull_request_teams_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_teams - ADD CONSTRAINT pull_request_teams_pkey PRIMARY KEY (pr_team_id); - - --- --- Name: pull_requests pull_requests_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_requests - ADD CONSTRAINT pull_requests_pkey PRIMARY KEY (pull_request_id); - - --- --- Name: releases releases_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.releases - ADD CONSTRAINT releases_pkey PRIMARY KEY (release_id); - - --- --- Name: repo_badging repo_badging_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_badging - ADD CONSTRAINT repo_badging_pkey PRIMARY KEY (badge_collection_id); - - --- --- Name: repo_cluster_messages repo_cluster_messages_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_cluster_messages - ADD CONSTRAINT repo_cluster_messages_pkey PRIMARY KEY (msg_cluster_id); - - --- --- Name: repo_dependencies repo_dependencies_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_dependencies - ADD CONSTRAINT repo_dependencies_pkey PRIMARY KEY (repo_dependencies_id); - - --- --- Name: repo_deps_libyear repo_deps_libyear_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_deps_libyear - ADD CONSTRAINT repo_deps_libyear_pkey PRIMARY KEY (repo_deps_libyear_id); - - --- --- Name: repo_deps_scorecard repo_deps_scorecard_pkey1; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_deps_scorecard - ADD CONSTRAINT repo_deps_scorecard_pkey1 PRIMARY KEY (repo_deps_scorecard_id); - - --- --- Name: repo repo_git-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo - ADD CONSTRAINT "repo_git-unique" UNIQUE (repo_git); - - --- --- Name: repo_group_insights repo_group_insights_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_group_insights - ADD CONSTRAINT repo_group_insights_pkey PRIMARY KEY (rgi_id); - - --- --- Name: repo_groups_list_serve repo_groups_list_serve_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_groups_list_serve - ADD CONSTRAINT repo_groups_list_serve_pkey PRIMARY KEY (rgls_id); - - --- --- Name: repo_info repo_info_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_info - ADD CONSTRAINT repo_info_pkey PRIMARY KEY (repo_info_id); - - --- --- Name: repo_insights repo_insights_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_insights - ADD CONSTRAINT repo_insights_pkey PRIMARY KEY (ri_id); - - --- --- Name: repo_insights_records repo_insights_records_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_insights_records - ADD CONSTRAINT repo_insights_records_pkey PRIMARY KEY (ri_id); - - --- --- Name: repo_labor repo_labor_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_labor - ADD CONSTRAINT repo_labor_pkey PRIMARY KEY (repo_labor_id); - - --- --- Name: repo_meta repo_meta_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_meta - ADD CONSTRAINT repo_meta_pkey PRIMARY KEY (rmeta_id, repo_id); - - --- --- Name: repo_sbom_scans repo_sbom_scans_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_sbom_scans - ADD CONSTRAINT repo_sbom_scans_pkey PRIMARY KEY (rsb_id); - - --- --- Name: repo_stats repo_stats_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_stats - ADD CONSTRAINT repo_stats_pkey PRIMARY KEY (rstat_id, repo_id); - - --- --- Name: repo_test_coverage repo_test_coverage_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_test_coverage - ADD CONSTRAINT repo_test_coverage_pkey PRIMARY KEY (repo_id); - - --- --- Name: repo_topic repo_topic_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_topic - ADD CONSTRAINT repo_topic_pkey PRIMARY KEY (repo_topic_id); - - --- --- Name: repo repounique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo - ADD CONSTRAINT repounique PRIMARY KEY (repo_id); - - --- --- Name: repo_groups rgid; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_groups - ADD CONSTRAINT rgid PRIMARY KEY (repo_group_id); - - --- --- Name: repo_groups_list_serve rglistserve; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_groups_list_serve - ADD CONSTRAINT rglistserve UNIQUE (rgls_id, repo_group_id); - - --- --- Name: repo_labor rl-unique; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_labor - ADD CONSTRAINT "rl-unique" UNIQUE (repo_id, rl_analysis_date, file_path, file_name) DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: settings settings_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.settings - ADD CONSTRAINT settings_pkey PRIMARY KEY (id); - - --- --- Name: pull_request_reviews sourcepr-review-id; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviews - ADD CONSTRAINT "sourcepr-review-id" UNIQUE (pr_review_src_id, tool_source); - - --- --- Name: CONSTRAINT "sourcepr-review-id" ON pull_request_reviews; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON CONSTRAINT "sourcepr-review-id" ON augur_data.pull_request_reviews IS 'Natural Key from Source, plus tool source to account for different platforms like GitHub and gitlab. '; - - --- --- Name: platform theplat; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.platform - ADD CONSTRAINT theplat PRIMARY KEY (pltfrm_id); - - --- --- Name: topic_words topic_words_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.topic_words - ADD CONSTRAINT topic_words_pkey PRIMARY KEY (topic_words_id); - - --- --- Name: issues unique-issue; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issues - ADD CONSTRAINT "unique-issue" UNIQUE (repo_id, gh_issue_id); - - --- --- Name: pull_requests unique-pr; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_requests - ADD CONSTRAINT "unique-pr" UNIQUE (repo_id, pr_src_id); - - --- --- Name: pull_request_events unique-pr-event-id; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_events - ADD CONSTRAINT "unique-pr-event-id" UNIQUE (platform_id, node_id); - - --- --- Name: pull_request_labels unique-pr-src-label-id; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_labels - ADD CONSTRAINT "unique-pr-src-label-id" UNIQUE (pr_src_id, pull_request_id); - - --- --- Name: pull_requests unique-prx; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_requests - ADD CONSTRAINT "unique-prx" UNIQUE (repo_id, pr_src_id); - - --- --- Name: contributor_affiliations unique_domain; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributor_affiliations - ADD CONSTRAINT unique_domain UNIQUE (ca_domain); - - --- --- Name: CONSTRAINT unique_domain ON contributor_affiliations; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON CONSTRAINT unique_domain ON augur_data.contributor_affiliations IS 'Only one row should exist for any given top level domain or subdomain. '; - - --- --- Name: issue_events unique_event_id_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_events - ADD CONSTRAINT unique_event_id_key UNIQUE (issue_id, issue_event_src_id); - - --- --- Name: CONSTRAINT unique_event_id_key ON issue_events; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON CONSTRAINT unique_event_id_key ON augur_data.issue_events IS 'Natural key for issue events. '; - - --- --- Name: issue_labels unique_issue_label; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_labels - ADD CONSTRAINT unique_issue_label UNIQUE (label_src_id, issue_id); - - --- --- Name: pull_request_reviewers unique_pr_src_reviewer_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviewers - ADD CONSTRAINT unique_pr_src_reviewer_key UNIQUE (pull_request_id, pr_reviewer_src_id); - - --- --- Name: unresolved_commit_emails unresolved_commit_emails_email_key; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.unresolved_commit_emails - ADD CONSTRAINT unresolved_commit_emails_email_key UNIQUE (email); - - --- --- Name: unresolved_commit_emails unresolved_commit_emails_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.unresolved_commit_emails - ADD CONSTRAINT unresolved_commit_emails_pkey PRIMARY KEY (email_unresolved_id); - - --- --- Name: utility_log utility_log_pkey; Type: CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.utility_log - ADD CONSTRAINT utility_log_pkey PRIMARY KEY (id); - - --- --- Name: augur_settings augur_settings_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.augur_settings - ADD CONSTRAINT augur_settings_pkey PRIMARY KEY (id); - - --- --- Name: config config_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.config - ADD CONSTRAINT config_pkey PRIMARY KEY (id); - - --- --- Name: worker_history history_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.worker_history - ADD CONSTRAINT history_pkey PRIMARY KEY (history_id); - - --- --- Name: worker_job job_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.worker_job - ADD CONSTRAINT job_pkey PRIMARY KEY (job_model); - - --- --- Name: worker_settings_facade settings_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.worker_settings_facade - ADD CONSTRAINT settings_pkey PRIMARY KEY (id); - - --- --- Name: config unique-config-setting; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.config - ADD CONSTRAINT "unique-config-setting" UNIQUE (section_name, setting_name); - - --- --- Name: users user-unique-email; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.users - ADD CONSTRAINT "user-unique-email" UNIQUE (email); - - --- --- Name: users user-unique-name; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.users - ADD CONSTRAINT "user-unique-name" UNIQUE (login_name); - - --- --- Name: users user-unique-phone; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.users - ADD CONSTRAINT "user-unique-phone" UNIQUE (text_phone); - - --- --- Name: users users_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.users - ADD CONSTRAINT users_pkey PRIMARY KEY (user_id); - - --- --- Name: worker_oauth worker_oauth_pkey; Type: CONSTRAINT; Schema: augur_operations; Owner: augur --- - -ALTER TABLE ONLY augur_operations.worker_oauth - ADD CONSTRAINT worker_oauth_pkey PRIMARY KEY (oauth_id); - - --- --- Name: alembic_version alembic_version_pkc; Type: CONSTRAINT; Schema: public; Owner: augur --- - -ALTER TABLE ONLY public.alembic_version - ADD CONSTRAINT alembic_version_pkc PRIMARY KEY (version_num); - - --- --- Name: annotation_types annotation_types_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.annotation_types - ADD CONSTRAINT annotation_types_pkey PRIMARY KEY (annotation_type_id); - - --- --- Name: annotations annotations_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.annotations - ADD CONSTRAINT annotations_pkey PRIMARY KEY (annotation_id); - - --- --- Name: augur_repo_map augur_repo_map_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.augur_repo_map - ADD CONSTRAINT augur_repo_map_pkey PRIMARY KEY (map_id); - - --- --- Name: creator_types creator_types_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.creator_types - ADD CONSTRAINT creator_types_pkey PRIMARY KEY (creator_type_id); - - --- --- Name: creators creators_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.creators - ADD CONSTRAINT creators_pkey PRIMARY KEY (creator_id); - - --- --- Name: document_namespaces document_namespaces_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.document_namespaces - ADD CONSTRAINT document_namespaces_pkey PRIMARY KEY (document_namespace_id); - - --- --- Name: documents_creators documents_creators_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.documents_creators - ADD CONSTRAINT documents_creators_pkey PRIMARY KEY (document_creator_id); - - --- --- Name: documents documents_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.documents - ADD CONSTRAINT documents_pkey PRIMARY KEY (document_id); - - --- --- Name: external_refs external_refs_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.external_refs - ADD CONSTRAINT external_refs_pkey PRIMARY KEY (external_ref_id); - - --- --- Name: file_contributors file_contributors_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.file_contributors - ADD CONSTRAINT file_contributors_pkey PRIMARY KEY (file_contributor_id); - - --- --- Name: files_licenses files_licenses_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files_licenses - ADD CONSTRAINT files_licenses_pkey PRIMARY KEY (file_license_id); - - --- --- Name: files files_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files - ADD CONSTRAINT files_pkey PRIMARY KEY (file_id); - - --- --- Name: files_scans files_scans_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files_scans - ADD CONSTRAINT files_scans_pkey PRIMARY KEY (file_scan_id); - - --- --- Name: identifiers identifiers_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT identifiers_pkey PRIMARY KEY (identifier_id); - - --- --- Name: licenses licenses_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.licenses - ADD CONSTRAINT licenses_pkey PRIMARY KEY (license_id); - - --- --- Name: packages_files packages_files_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_files - ADD CONSTRAINT packages_files_pkey PRIMARY KEY (package_file_id); - - --- --- Name: packages packages_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages - ADD CONSTRAINT packages_pkey PRIMARY KEY (package_id); - - --- --- Name: packages_scans packages_scans_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_scans - ADD CONSTRAINT packages_scans_pkey PRIMARY KEY (package_scan_id); - - --- --- Name: projects projects_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.projects - ADD CONSTRAINT projects_pkey PRIMARY KEY (package_id); - - --- --- Name: relationship_types relationship_types_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.relationship_types - ADD CONSTRAINT relationship_types_pkey PRIMARY KEY (relationship_type_id); - - --- --- Name: relationships relationships_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.relationships - ADD CONSTRAINT relationships_pkey PRIMARY KEY (relationship_id); - - --- --- Name: scanners scanners_pkey; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.scanners - ADD CONSTRAINT scanners_pkey PRIMARY KEY (scanner_id); - - --- --- Name: annotation_types uc_annotation_type_name; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.annotation_types - ADD CONSTRAINT uc_annotation_type_name UNIQUE (name); - - --- --- Name: packages uc_dir_code_ver_code; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages - ADD CONSTRAINT uc_dir_code_ver_code UNIQUE (verification_code, dosocs2_dir_code); - - --- --- Name: documents uc_document_document_namespace_id; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.documents - ADD CONSTRAINT uc_document_document_namespace_id UNIQUE (document_namespace_id); - - --- --- Name: document_namespaces uc_document_namespace_uri; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.document_namespaces - ADD CONSTRAINT uc_document_namespace_uri UNIQUE (uri); - - --- --- Name: external_refs uc_external_ref_document_id_string; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.external_refs - ADD CONSTRAINT uc_external_ref_document_id_string UNIQUE (document_id, id_string); - - --- --- Name: files_licenses uc_file_license; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files_licenses - ADD CONSTRAINT uc_file_license UNIQUE (file_id, license_id); - - --- --- Name: files_scans uc_file_scanner_id; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files_scans - ADD CONSTRAINT uc_file_scanner_id UNIQUE (file_id, scanner_id); - - --- --- Name: files uc_file_sha256; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files - ADD CONSTRAINT uc_file_sha256 UNIQUE (sha256); - - --- --- Name: file_types uc_file_type_name; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.file_types - ADD CONSTRAINT uc_file_type_name PRIMARY KEY (name); - - --- --- Name: identifiers uc_identifier_document_namespace_id; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT uc_identifier_document_namespace_id UNIQUE (document_namespace_id, id_string); - - --- --- Name: identifiers uc_identifier_namespace_document_id; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT uc_identifier_namespace_document_id UNIQUE (document_namespace_id, document_id); - - --- --- Name: identifiers uc_identifier_namespace_package_file_id; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT uc_identifier_namespace_package_file_id UNIQUE (document_namespace_id, package_file_id); - - --- --- Name: identifiers uc_identifier_namespace_package_id; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT uc_identifier_namespace_package_id UNIQUE (document_namespace_id, package_id); - - --- --- Name: relationships uc_left_right_relationship_type; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.relationships - ADD CONSTRAINT uc_left_right_relationship_type UNIQUE (left_identifier_id, right_identifier_id, relationship_type_id); - - --- --- Name: licenses uc_license_short_name; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.licenses - ADD CONSTRAINT uc_license_short_name UNIQUE (short_name); - - --- --- Name: packages_files uc_package_id_file_name; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_files - ADD CONSTRAINT uc_package_id_file_name UNIQUE (package_id, file_name); - - --- --- Name: packages_scans uc_package_scanner_id; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_scans - ADD CONSTRAINT uc_package_scanner_id UNIQUE (package_id, scanner_id); - - --- --- Name: packages uc_package_sha256; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages - ADD CONSTRAINT uc_package_sha256 UNIQUE (sha256); - - --- --- Name: relationship_types uc_relationship_type_name; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.relationship_types - ADD CONSTRAINT uc_relationship_type_name UNIQUE (name); - - --- --- Name: scanners uc_scanner_name; Type: CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.scanners - ADD CONSTRAINT uc_scanner_name UNIQUE (name); - - --- --- Name: REPO_DEP; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "REPO_DEP" ON augur_data.library_dependencies USING btree (library_id); - - --- --- Name: author_affiliation; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX author_affiliation ON augur_data.commits USING btree (cmt_author_affiliation); - - --- --- Name: author_cntrb_id; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX author_cntrb_id ON augur_data.commits USING btree (cmt_ght_author_id); - - --- --- Name: author_email,author_affiliation,author_date; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "author_email,author_affiliation,author_date" ON augur_data.commits USING btree (cmt_author_email, cmt_author_affiliation, cmt_author_date); - - --- --- Name: author_raw_email; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX author_raw_email ON augur_data.commits USING btree (cmt_author_raw_email); - - --- --- Name: cnt-fullname; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "cnt-fullname" ON augur_data.contributors USING hash (cntrb_full_name); - - --- --- Name: cntrb-theemail; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "cntrb-theemail" ON augur_data.contributors USING hash (cntrb_email); - - --- --- Name: cntrb_canonica-idx11; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "cntrb_canonica-idx11" ON augur_data.contributors USING btree (cntrb_canonical); - - --- --- Name: cntrb_login_platform_index; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX cntrb_login_platform_index ON augur_data.contributors USING btree (cntrb_login); - - --- --- Name: comment_id; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX comment_id ON augur_data.commit_comment_ref USING btree (cmt_comment_src_id, cmt_comment_id, msg_id); - - --- --- Name: commit_parents_ibfk_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX commit_parents_ibfk_1 ON augur_data.commit_parents USING btree (cmt_id); - - --- --- Name: commit_parents_ibfk_2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX commit_parents_ibfk_2 ON augur_data.commit_parents USING btree (parent_id); - - --- --- Name: commited; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX commited ON augur_data.commits USING btree (cmt_id); - - --- --- Name: commits_idx_cmt_email_cmt_date_cmt_name; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX commits_idx_cmt_email_cmt_date_cmt_name ON augur_data.commits USING btree (cmt_author_email, cmt_author_date, cmt_author_name); - - --- --- Name: committer_affiliation; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX committer_affiliation ON augur_data.commits USING btree (cmt_committer_affiliation); - - --- --- Name: committer_raw_email; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX committer_raw_email ON augur_data.commits USING btree (cmt_committer_raw_email); - - --- --- Name: contributor_worker_email_finder; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX contributor_worker_email_finder ON augur_data.contributors USING brin (cntrb_email); - - --- --- Name: contributor_worker_fullname_finder; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX contributor_worker_fullname_finder ON augur_data.contributors USING brin (cntrb_full_name); - - --- --- Name: contributors_idx_cntrb_email3; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX contributors_idx_cntrb_email3 ON augur_data.contributors USING btree (cntrb_email); - - --- --- Name: dater; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX dater ON augur_data.repo_insights_records USING btree (ri_date); - - --- --- Name: forked; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX forked ON augur_data.repo USING btree (forked_from); - - --- --- Name: id_node; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX id_node ON augur_data.pull_requests USING btree (pr_src_id DESC, pr_src_node_id DESC NULLS LAST); - - --- --- Name: issue-cntrb-assign-idx-1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "issue-cntrb-assign-idx-1" ON augur_data.issue_assignees USING btree (cntrb_id); - - --- --- Name: issue-cntrb-dix2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "issue-cntrb-dix2" ON augur_data.issues USING btree (cntrb_id); - - --- --- Name: issue-cntrb-idx2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "issue-cntrb-idx2" ON augur_data.issue_events USING btree (issue_event_src_id); - - --- --- Name: issue_events_ibfk_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX issue_events_ibfk_1 ON augur_data.issue_events USING btree (issue_id); - - --- --- Name: issue_events_ibfk_2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX issue_events_ibfk_2 ON augur_data.issue_events USING btree (cntrb_id); - - --- --- Name: issues_ibfk_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX issues_ibfk_1 ON augur_data.issues USING btree (repo_id); - - --- --- Name: issues_ibfk_2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX issues_ibfk_2 ON augur_data.issues USING btree (reporter_id); - - --- --- Name: issues_ibfk_4; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX issues_ibfk_4 ON augur_data.issues USING btree (pull_request_id); - - --- --- Name: lister; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE UNIQUE INDEX lister ON augur_data.repo_groups_list_serve USING btree (rgls_id, repo_group_id); - - --- --- Name: login; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX login ON augur_data.contributors USING btree (cntrb_login); - - --- --- Name: login-contributor-idx; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "login-contributor-idx" ON augur_data.contributors USING btree (cntrb_login); - - --- --- Name: messagegrouper; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE UNIQUE INDEX messagegrouper ON augur_data.message USING btree (msg_id, rgls_id); - - --- --- Name: msg-cntrb-id-idx; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "msg-cntrb-id-idx" ON augur_data.message USING btree (cntrb_id); - - --- --- Name: plat; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE UNIQUE INDEX plat ON augur_data.platform USING btree (pltfrm_id); - - --- --- Name: platformgrouper; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX platformgrouper ON augur_data.message USING btree (msg_id, pltfrm_id); - - --- --- Name: pr-cntrb-idx-repo; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "pr-cntrb-idx-repo" ON augur_data.pull_request_repo USING btree (pr_cntrb_id); - - --- --- Name: pr-reviewers-cntrb-idx1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "pr-reviewers-cntrb-idx1" ON augur_data.pull_request_reviewers USING btree (cntrb_id); - - --- --- Name: pr_anal_idx; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX pr_anal_idx ON augur_data.pull_request_analysis USING btree (pull_request_id); - - --- --- Name: pr_events_ibfk_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX pr_events_ibfk_1 ON augur_data.pull_request_events USING btree (pull_request_id); - - --- --- Name: pr_events_ibfk_2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX pr_events_ibfk_2 ON augur_data.pull_request_events USING btree (cntrb_id); - - --- --- Name: pr_meta-cntrbid-idx; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "pr_meta-cntrbid-idx" ON augur_data.pull_request_meta USING btree (cntrb_id); - - --- --- Name: pr_meta_cntrb-idx; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "pr_meta_cntrb-idx" ON augur_data.pull_request_assignees USING btree (contrib_id); - - --- --- Name: probability_idx; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX probability_idx ON augur_data.pull_request_analysis USING btree (merge_probability DESC NULLS LAST); - - --- --- Name: projects_id,affiliation; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,affiliation" ON augur_data.dm_repo_group_weekly USING btree (repo_group_id, affiliation); - - --- --- Name: projects_id,affiliation_copy_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,affiliation_copy_1" ON augur_data.dm_repo_group_annual USING btree (repo_group_id, affiliation); - - --- --- Name: projects_id,affiliation_copy_2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,affiliation_copy_2" ON augur_data.dm_repo_group_monthly USING btree (repo_group_id, affiliation); - - --- --- Name: projects_id,email; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,email" ON augur_data.dm_repo_group_weekly USING btree (repo_group_id, email); - - --- --- Name: projects_id,email_copy_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,email_copy_1" ON augur_data.dm_repo_group_annual USING btree (repo_group_id, email); - - --- --- Name: projects_id,email_copy_2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,email_copy_2" ON augur_data.dm_repo_group_monthly USING btree (repo_group_id, email); - - --- --- Name: projects_id,year,affiliation; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,year,affiliation" ON augur_data.dm_repo_group_weekly USING btree (repo_group_id, year, affiliation); - - --- --- Name: projects_id,year,affiliation_copy_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,year,affiliation_copy_1" ON augur_data.dm_repo_group_monthly USING btree (repo_group_id, year, affiliation); - - --- --- Name: projects_id,year,email; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,year,email" ON augur_data.dm_repo_group_weekly USING btree (repo_group_id, year, email); - - --- --- Name: projects_id,year,email_copy_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "projects_id,year,email_copy_1" ON augur_data.dm_repo_group_monthly USING btree (repo_group_id, year, email); - - --- --- Name: pull_requests_idx_repo_id_data_datex; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX pull_requests_idx_repo_id_data_datex ON augur_data.pull_requests USING btree (repo_id, data_collection_date); - - --- --- Name: repo_id,affiliation; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,affiliation" ON augur_data.dm_repo_weekly USING btree (repo_id, affiliation); - - --- --- Name: repo_id,affiliation_copy_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,affiliation_copy_1" ON augur_data.dm_repo_annual USING btree (repo_id, affiliation); - - --- --- Name: repo_id,affiliation_copy_2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,affiliation_copy_2" ON augur_data.dm_repo_monthly USING btree (repo_id, affiliation); - - --- --- Name: repo_id,commit; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,commit" ON augur_data.commits USING btree (repo_id, cmt_commit_hash); - - --- --- Name: repo_id,email; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,email" ON augur_data.dm_repo_weekly USING btree (repo_id, email); - - --- --- Name: repo_id,email_copy_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,email_copy_1" ON augur_data.dm_repo_annual USING btree (repo_id, email); - - --- --- Name: repo_id,email_copy_2; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,email_copy_2" ON augur_data.dm_repo_monthly USING btree (repo_id, email); - - --- --- Name: repo_id,year,affiliation; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,year,affiliation" ON augur_data.dm_repo_weekly USING btree (repo_id, year, affiliation); - - --- --- Name: repo_id,year,affiliation_copy_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,year,affiliation_copy_1" ON augur_data.dm_repo_monthly USING btree (repo_id, year, affiliation); - - --- --- Name: repo_id,year,email; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,year,email" ON augur_data.dm_repo_weekly USING btree (repo_id, year, email); - - --- --- Name: repo_id,year,email_copy_1; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repo_id,year,email_copy_1" ON augur_data.dm_repo_monthly USING btree (repo_id, year, email); - - --- --- Name: repo_idx_repo_id_repo_namex; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX repo_idx_repo_id_repo_namex ON augur_data.repo USING btree (repo_id, repo_name); - - --- --- Name: repo_info_idx_repo_id_data_date_1x; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX repo_info_idx_repo_id_data_date_1x ON augur_data.repo_info USING btree (repo_id, data_collection_date); - - --- --- Name: repo_info_idx_repo_id_data_datex; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX repo_info_idx_repo_id_data_datex ON augur_data.repo_info USING btree (repo_id, data_collection_date); - - --- --- Name: repogitindexrep; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX repogitindexrep ON augur_data.repo USING btree (repo_git); - - --- --- Name: reponameindex; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX reponameindex ON augur_data.repo USING hash (repo_name); - - --- --- Name: reponameindexbtree; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX reponameindexbtree ON augur_data.repo USING btree (repo_name); - - --- --- Name: repos_id; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX repos_id ON augur_data.analysis_log USING btree (repos_id); - - --- --- Name: repos_id,status; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repos_id,status" ON augur_data.repos_fetch_log USING btree (repos_id, status); - - --- --- Name: repos_id,statusops; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "repos_id,statusops" ON augur_data.repos_fetch_log USING btree (repos_id, status); - - --- --- Name: rggrouponrepoindex; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX rggrouponrepoindex ON augur_data.repo USING btree (repo_group_id); - - --- --- Name: rgidm; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE UNIQUE INDEX rgidm ON augur_data.repo_groups USING btree (repo_group_id); - - --- --- Name: rgnameindex; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX rgnameindex ON augur_data.repo_groups USING btree (rg_name); - - --- --- Name: therepo; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE UNIQUE INDEX therepo ON augur_data.repo USING btree (repo_id); - - --- --- Name: type,projects_id; Type: INDEX; Schema: augur_data; Owner: augur --- - -CREATE INDEX "type,projects_id" ON augur_data.unknown_cache USING btree (type, repo_group_id); - - --- --- Name: repos_id,statusops; Type: INDEX; Schema: augur_operations; Owner: augur --- - -CREATE INDEX "repos_id,statusops" ON augur_operations.repos_fetch_log USING btree (repos_id, status); - - --- --- Name: contributor_repo contributor_repo_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributor_repo - ADD CONSTRAINT contributor_repo_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: contributors_aliases contributors_aliases_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.contributors_aliases - ADD CONSTRAINT contributors_aliases_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_reviews fk-review-platform; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviews - ADD CONSTRAINT "fk-review-platform" FOREIGN KEY (platform_id) REFERENCES augur_data.platform(pltfrm_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: commit_comment_ref fk_commit_comment_ref_commits_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commit_comment_ref - ADD CONSTRAINT fk_commit_comment_ref_commits_1 FOREIGN KEY (cmt_id) REFERENCES augur_data.commits(cmt_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: commit_comment_ref fk_commit_comment_ref_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commit_comment_ref - ADD CONSTRAINT fk_commit_comment_ref_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: commit_parents fk_commit_parents_commits_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commit_parents - ADD CONSTRAINT fk_commit_parents_commits_1 FOREIGN KEY (cmt_id) REFERENCES augur_data.commits(cmt_id); - - --- --- Name: commit_parents fk_commit_parents_commits_2; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commit_parents - ADD CONSTRAINT fk_commit_parents_commits_2 FOREIGN KEY (parent_id) REFERENCES augur_data.commits(cmt_id); - - --- --- Name: commits fk_commits_contributors_3; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commits - ADD CONSTRAINT fk_commits_contributors_3 FOREIGN KEY (cmt_author_platform_username) REFERENCES augur_data.contributors(cntrb_login) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: commits fk_commits_contributors_4; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commits - ADD CONSTRAINT fk_commits_contributors_4 FOREIGN KEY (cmt_author_platform_username) REFERENCES augur_data.contributors(cntrb_login) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: commits fk_commits_repo_2; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.commits - ADD CONSTRAINT fk_commits_repo_2 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: discourse_insights fk_discourse_insights_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.discourse_insights - ADD CONSTRAINT fk_discourse_insights_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id); - - --- --- Name: issue_assignees fk_issue_assignee_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_assignees - ADD CONSTRAINT fk_issue_assignee_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: issue_assignees fk_issue_assignees_issues_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_assignees - ADD CONSTRAINT fk_issue_assignees_issues_1 FOREIGN KEY (issue_id) REFERENCES augur_data.issues(issue_id); - - --- --- Name: issue_events fk_issue_event_platform_ide; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_events - ADD CONSTRAINT fk_issue_event_platform_ide FOREIGN KEY (platform_id) REFERENCES augur_data.platform(pltfrm_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: issue_events fk_issue_events_issues_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_events - ADD CONSTRAINT fk_issue_events_issues_1 FOREIGN KEY (issue_id) REFERENCES augur_data.issues(issue_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: issue_events fk_issue_events_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_events - ADD CONSTRAINT fk_issue_events_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: issue_labels fk_issue_labels_issues_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_labels - ADD CONSTRAINT fk_issue_labels_issues_1 FOREIGN KEY (issue_id) REFERENCES augur_data.issues(issue_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: issue_labels fk_issue_labels_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_labels - ADD CONSTRAINT fk_issue_labels_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: issue_message_ref fk_issue_message_ref_issues_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_message_ref - ADD CONSTRAINT fk_issue_message_ref_issues_1 FOREIGN KEY (issue_id) REFERENCES augur_data.issues(issue_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: issue_message_ref fk_issue_message_ref_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_message_ref - ADD CONSTRAINT fk_issue_message_ref_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: issues fk_issues_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issues - ADD CONSTRAINT fk_issues_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: libraries fk_libraries_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.libraries - ADD CONSTRAINT fk_libraries_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: library_dependencies fk_library_dependencies_libraries_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.library_dependencies - ADD CONSTRAINT fk_library_dependencies_libraries_1 FOREIGN KEY (library_id) REFERENCES augur_data.libraries(library_id); - - --- --- Name: library_version fk_library_version_libraries_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.library_version - ADD CONSTRAINT fk_library_version_libraries_1 FOREIGN KEY (library_id) REFERENCES augur_data.libraries(library_id); - - --- --- Name: lstm_anomaly_results fk_lstm_anomaly_results_lstm_anomaly_models_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.lstm_anomaly_results - ADD CONSTRAINT fk_lstm_anomaly_results_lstm_anomaly_models_1 FOREIGN KEY (model_id) REFERENCES augur_data.lstm_anomaly_models(model_id); - - --- --- Name: lstm_anomaly_results fk_lstm_anomaly_results_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.lstm_anomaly_results - ADD CONSTRAINT fk_lstm_anomaly_results_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: message_analysis fk_message_analysis_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message_analysis - ADD CONSTRAINT fk_message_analysis_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id); - - --- --- Name: message_analysis_summary fk_message_analysis_summary_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message_analysis_summary - ADD CONSTRAINT fk_message_analysis_summary_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: message fk_message_platform_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message - ADD CONSTRAINT fk_message_platform_1 FOREIGN KEY (pltfrm_id) REFERENCES augur_data.platform(pltfrm_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: message fk_message_repo_groups_list_serve_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message - ADD CONSTRAINT fk_message_repo_groups_list_serve_1 FOREIGN KEY (rgls_id) REFERENCES augur_data.repo_groups_list_serve(rgls_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: message fk_message_repoid; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message - ADD CONSTRAINT fk_message_repoid FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: message_sentiment fk_message_sentiment_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message_sentiment - ADD CONSTRAINT fk_message_sentiment_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id); - - --- --- Name: message_sentiment_summary fk_message_sentiment_summary_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message_sentiment_summary - ADD CONSTRAINT fk_message_sentiment_summary_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: pull_request_message_ref fk_pr_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_message_ref - ADD CONSTRAINT fk_pr_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: pull_request_analysis fk_pull_request_analysis_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_analysis - ADD CONSTRAINT fk_pull_request_analysis_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_assignees fk_pull_request_assignees_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_assignees - ADD CONSTRAINT fk_pull_request_assignees_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_assignees fk_pull_request_assignees_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_assignees - ADD CONSTRAINT fk_pull_request_assignees_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_commits fk_pull_request_commits_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_commits - ADD CONSTRAINT fk_pull_request_commits_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_files fk_pull_request_commits_pull_requests_1_copy_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_files - ADD CONSTRAINT fk_pull_request_commits_pull_requests_1_copy_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_commits fk_pull_request_commits_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_commits - ADD CONSTRAINT fk_pull_request_commits_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: pull_request_events fk_pull_request_events_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_events - ADD CONSTRAINT fk_pull_request_events_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_files fk_pull_request_files_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_files - ADD CONSTRAINT fk_pull_request_files_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_labels fk_pull_request_labels_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_labels - ADD CONSTRAINT fk_pull_request_labels_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_labels fk_pull_request_labels_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_labels - ADD CONSTRAINT fk_pull_request_labels_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: pull_request_message_ref fk_pull_request_message_ref_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_message_ref - ADD CONSTRAINT fk_pull_request_message_ref_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_message_ref fk_pull_request_message_ref_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_message_ref - ADD CONSTRAINT fk_pull_request_message_ref_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_meta fk_pull_request_meta_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_meta - ADD CONSTRAINT fk_pull_request_meta_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_meta fk_pull_request_repo_meta_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_meta - ADD CONSTRAINT fk_pull_request_repo_meta_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_repo fk_pull_request_repo_pull_request_meta_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_repo - ADD CONSTRAINT fk_pull_request_repo_pull_request_meta_1 FOREIGN KEY (pr_repo_meta_id) REFERENCES augur_data.pull_request_meta(pr_repo_meta_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_review_message_ref fk_pull_request_review_message_ref_message_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_review_message_ref - ADD CONSTRAINT fk_pull_request_review_message_ref_message_1 FOREIGN KEY (msg_id) REFERENCES augur_data.message(msg_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_review_message_ref fk_pull_request_review_message_ref_pull_request_reviews_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_review_message_ref - ADD CONSTRAINT fk_pull_request_review_message_ref_pull_request_reviews_1 FOREIGN KEY (pr_review_id) REFERENCES augur_data.pull_request_reviews(pr_review_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_reviewers fk_pull_request_reviewers_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviewers - ADD CONSTRAINT fk_pull_request_reviewers_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_reviews fk_pull_request_reviews_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviews - ADD CONSTRAINT fk_pull_request_reviews_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_teams fk_pull_request_teams_pull_requests_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_teams - ADD CONSTRAINT fk_pull_request_teams_pull_requests_1 FOREIGN KEY (pull_request_id) REFERENCES augur_data.pull_requests(pull_request_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_requests fk_pull_requests_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_requests - ADD CONSTRAINT fk_pull_requests_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: releases fk_releases_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.releases - ADD CONSTRAINT fk_releases_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_badging fk_repo_badging_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_badging - ADD CONSTRAINT fk_repo_badging_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_cluster_messages fk_repo_cluster_messages_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_cluster_messages - ADD CONSTRAINT fk_repo_cluster_messages_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_group_insights fk_repo_group_insights_repo_groups_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_group_insights - ADD CONSTRAINT fk_repo_group_insights_repo_groups_1 FOREIGN KEY (repo_group_id) REFERENCES augur_data.repo_groups(repo_group_id); - - --- --- Name: repo_groups_list_serve fk_repo_groups_list_serve_repo_groups_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_groups_list_serve - ADD CONSTRAINT fk_repo_groups_list_serve_repo_groups_1 FOREIGN KEY (repo_group_id) REFERENCES augur_data.repo_groups(repo_group_id); - - --- --- Name: issue_message_ref fk_repo_id_fk1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_message_ref - ADD CONSTRAINT fk_repo_id_fk1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: repo_info fk_repo_info_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_info - ADD CONSTRAINT fk_repo_info_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_insights fk_repo_insights_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_insights - ADD CONSTRAINT fk_repo_insights_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_labor fk_repo_labor_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_labor - ADD CONSTRAINT fk_repo_labor_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_meta fk_repo_meta_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_meta - ADD CONSTRAINT fk_repo_meta_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo fk_repo_repo_groups_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo - ADD CONSTRAINT fk_repo_repo_groups_1 FOREIGN KEY (repo_group_id) REFERENCES augur_data.repo_groups(repo_group_id); - - --- --- Name: CONSTRAINT fk_repo_repo_groups_1 ON repo; Type: COMMENT; Schema: augur_data; Owner: augur --- - -COMMENT ON CONSTRAINT fk_repo_repo_groups_1 ON augur_data.repo IS 'Repo_groups cardinality set to one and only one because, although in theory there could be more than one repo group for a repo, this might create dependencies in hosted situation that we do not want to live with. '; - - --- --- Name: pull_request_reviews fk_repo_review; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviews - ADD CONSTRAINT fk_repo_review FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: repo_stats fk_repo_stats_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_stats - ADD CONSTRAINT fk_repo_stats_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_test_coverage fk_repo_test_coverage_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_test_coverage - ADD CONSTRAINT fk_repo_test_coverage_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_topic fk_repo_topic_repo_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_topic - ADD CONSTRAINT fk_repo_topic_repo_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: pull_request_review_message_ref fk_review_repo; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_review_message_ref - ADD CONSTRAINT fk_review_repo FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_events fkpr_platform; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_events - ADD CONSTRAINT fkpr_platform FOREIGN KEY (platform_id) REFERENCES augur_data.platform(pltfrm_id) ON UPDATE RESTRICT ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: pull_request_events fkprevent_repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_events - ADD CONSTRAINT fkprevent_repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE RESTRICT ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED; - - --- --- Name: issue_assignees issue_assignees_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_assignees - ADD CONSTRAINT issue_assignees_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id); - - --- --- Name: issue_events issue_events_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issue_events - ADD CONSTRAINT issue_events_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: issues issues_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issues - ADD CONSTRAINT issues_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id); - - --- --- Name: issues issues_reporter_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.issues - ADD CONSTRAINT issues_reporter_id_fkey FOREIGN KEY (reporter_id) REFERENCES augur_data.contributors(cntrb_id); - - --- --- Name: message message_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.message - ADD CONSTRAINT message_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_assignees pull_request_assignees_contrib_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_assignees - ADD CONSTRAINT pull_request_assignees_contrib_id_fkey FOREIGN KEY (contrib_id) REFERENCES augur_data.contributors(cntrb_id); - - --- --- Name: pull_request_commits pull_request_commits_pr_cmt_author_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_commits - ADD CONSTRAINT pull_request_commits_pr_cmt_author_cntrb_id_fkey FOREIGN KEY (pr_cmt_author_cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_events pull_request_events_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_events - ADD CONSTRAINT pull_request_events_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id); - - --- --- Name: pull_request_meta pull_request_meta_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_meta - ADD CONSTRAINT pull_request_meta_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id); - - --- --- Name: pull_request_repo pull_request_repo_pr_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_repo - ADD CONSTRAINT pull_request_repo_pr_cntrb_id_fkey FOREIGN KEY (pr_cntrb_id) REFERENCES augur_data.contributors(cntrb_id); - - --- --- Name: pull_request_reviewers pull_request_reviewers_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviewers - ADD CONSTRAINT pull_request_reviewers_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: pull_request_reviews pull_request_reviews_cntrb_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_request_reviews - ADD CONSTRAINT pull_request_reviews_cntrb_id_fkey FOREIGN KEY (cntrb_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: pull_requests pull_requests_pr_augur_contributor_id_fkey; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.pull_requests - ADD CONSTRAINT pull_requests_pr_augur_contributor_id_fkey FOREIGN KEY (pr_augur_contributor_id) REFERENCES augur_data.contributors(cntrb_id) ON UPDATE CASCADE ON DELETE RESTRICT; - - --- --- Name: repo_dependencies repo_id; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_dependencies - ADD CONSTRAINT repo_id FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_deps_scorecard repo_id_copy_1; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_deps_scorecard - ADD CONSTRAINT repo_id_copy_1 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_deps_libyear repo_id_copy_2; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_deps_libyear - ADD CONSTRAINT repo_id_copy_2 FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id); - - --- --- Name: repo_insights_records repo_id_ref; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_insights_records - ADD CONSTRAINT repo_id_ref FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE SET NULL; - - --- --- Name: repo_sbom_scans repo_linker_sbom; Type: FK CONSTRAINT; Schema: augur_data; Owner: augur --- - -ALTER TABLE ONLY augur_data.repo_sbom_scans - ADD CONSTRAINT repo_linker_sbom FOREIGN KEY (repo_id) REFERENCES augur_data.repo(repo_id) ON UPDATE CASCADE ON DELETE CASCADE; - - --- --- Name: annotations annotations_annotation_type_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.annotations - ADD CONSTRAINT annotations_annotation_type_id_fkey FOREIGN KEY (annotation_type_id) REFERENCES spdx.annotation_types(annotation_type_id); - - --- --- Name: annotations annotations_creator_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.annotations - ADD CONSTRAINT annotations_creator_id_fkey FOREIGN KEY (creator_id) REFERENCES spdx.creators(creator_id); - - --- --- Name: annotations annotations_document_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.annotations - ADD CONSTRAINT annotations_document_id_fkey FOREIGN KEY (document_id) REFERENCES spdx.documents(document_id); - - --- --- Name: annotations annotations_identifier_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.annotations - ADD CONSTRAINT annotations_identifier_id_fkey FOREIGN KEY (identifier_id) REFERENCES spdx.identifiers(identifier_id); - - --- --- Name: creators creators_creator_type_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.creators - ADD CONSTRAINT creators_creator_type_id_fkey FOREIGN KEY (creator_type_id) REFERENCES spdx.creator_types(creator_type_id); - - --- --- Name: documents_creators documents_creators_creator_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.documents_creators - ADD CONSTRAINT documents_creators_creator_id_fkey FOREIGN KEY (creator_id) REFERENCES spdx.creators(creator_id); - - --- --- Name: documents_creators documents_creators_document_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.documents_creators - ADD CONSTRAINT documents_creators_document_id_fkey FOREIGN KEY (document_id) REFERENCES spdx.documents(document_id); - - --- --- Name: documents documents_data_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.documents - ADD CONSTRAINT documents_data_license_id_fkey FOREIGN KEY (data_license_id) REFERENCES spdx.licenses(license_id); - - --- --- Name: documents documents_document_namespace_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.documents - ADD CONSTRAINT documents_document_namespace_id_fkey FOREIGN KEY (document_namespace_id) REFERENCES spdx.document_namespaces(document_namespace_id); - - --- --- Name: documents documents_package_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.documents - ADD CONSTRAINT documents_package_id_fkey FOREIGN KEY (package_id) REFERENCES spdx.packages(package_id); - - --- --- Name: external_refs external_refs_document_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.external_refs - ADD CONSTRAINT external_refs_document_id_fkey FOREIGN KEY (document_id) REFERENCES spdx.documents(document_id); - - --- --- Name: external_refs external_refs_document_namespace_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.external_refs - ADD CONSTRAINT external_refs_document_namespace_id_fkey FOREIGN KEY (document_namespace_id) REFERENCES spdx.document_namespaces(document_namespace_id); - - --- --- Name: file_contributors file_contributors_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.file_contributors - ADD CONSTRAINT file_contributors_file_id_fkey FOREIGN KEY (file_id) REFERENCES spdx.files(file_id); - - --- --- Name: files_licenses files_licenses_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files_licenses - ADD CONSTRAINT files_licenses_file_id_fkey FOREIGN KEY (file_id) REFERENCES spdx.files(file_id); - - --- --- Name: files_licenses files_licenses_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files_licenses - ADD CONSTRAINT files_licenses_license_id_fkey FOREIGN KEY (license_id) REFERENCES spdx.licenses(license_id); - - --- --- Name: files_scans files_scans_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files_scans - ADD CONSTRAINT files_scans_file_id_fkey FOREIGN KEY (file_id) REFERENCES spdx.files(file_id); - - --- --- Name: files_scans files_scans_scanner_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.files_scans - ADD CONSTRAINT files_scans_scanner_id_fkey FOREIGN KEY (scanner_id) REFERENCES spdx.scanners(scanner_id); - - --- --- Name: packages_files fk_package_files_packages; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_files - ADD CONSTRAINT fk_package_files_packages FOREIGN KEY (package_id) REFERENCES spdx.packages(package_id); - - --- --- Name: packages fk_package_packages_files; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages - ADD CONSTRAINT fk_package_packages_files FOREIGN KEY (ver_code_excluded_file_id) REFERENCES spdx.packages_files(package_file_id); - - --- --- Name: identifiers identifiers_document_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT identifiers_document_id_fkey FOREIGN KEY (document_id) REFERENCES spdx.documents(document_id); - - --- --- Name: identifiers identifiers_document_namespace_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT identifiers_document_namespace_id_fkey FOREIGN KEY (document_namespace_id) REFERENCES spdx.document_namespaces(document_namespace_id); - - --- --- Name: identifiers identifiers_package_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT identifiers_package_file_id_fkey FOREIGN KEY (package_file_id) REFERENCES spdx.packages_files(package_file_id); - - --- --- Name: identifiers identifiers_package_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.identifiers - ADD CONSTRAINT identifiers_package_id_fkey FOREIGN KEY (package_id) REFERENCES spdx.packages(package_id); - - --- --- Name: packages packages_concluded_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages - ADD CONSTRAINT packages_concluded_license_id_fkey FOREIGN KEY (concluded_license_id) REFERENCES spdx.licenses(license_id); - - --- --- Name: packages packages_declared_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages - ADD CONSTRAINT packages_declared_license_id_fkey FOREIGN KEY (declared_license_id) REFERENCES spdx.licenses(license_id); - - --- --- Name: packages_files packages_files_concluded_license_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_files - ADD CONSTRAINT packages_files_concluded_license_id_fkey FOREIGN KEY (concluded_license_id) REFERENCES spdx.licenses(license_id); - - --- --- Name: packages_files packages_files_file_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_files - ADD CONSTRAINT packages_files_file_id_fkey FOREIGN KEY (file_id) REFERENCES spdx.files(file_id); - - --- --- Name: packages packages_originator_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages - ADD CONSTRAINT packages_originator_id_fkey FOREIGN KEY (originator_id) REFERENCES spdx.creators(creator_id); - - --- --- Name: packages_scans packages_scans_package_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_scans - ADD CONSTRAINT packages_scans_package_id_fkey FOREIGN KEY (package_id) REFERENCES spdx.packages(package_id); - - --- --- Name: packages_scans packages_scans_scanner_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages_scans - ADD CONSTRAINT packages_scans_scanner_id_fkey FOREIGN KEY (scanner_id) REFERENCES spdx.scanners(scanner_id); - - --- --- Name: packages packages_supplier_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.packages - ADD CONSTRAINT packages_supplier_id_fkey FOREIGN KEY (supplier_id) REFERENCES spdx.creators(creator_id); - - --- --- Name: relationships relationships_left_identifier_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.relationships - ADD CONSTRAINT relationships_left_identifier_id_fkey FOREIGN KEY (left_identifier_id) REFERENCES spdx.identifiers(identifier_id); - - --- --- Name: relationships relationships_relationship_type_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.relationships - ADD CONSTRAINT relationships_relationship_type_id_fkey FOREIGN KEY (relationship_type_id) REFERENCES spdx.relationship_types(relationship_type_id); - - --- --- Name: relationships relationships_right_identifier_id_fkey; Type: FK CONSTRAINT; Schema: spdx; Owner: augur --- - -ALTER TABLE ONLY spdx.relationships - ADD CONSTRAINT relationships_right_identifier_id_fkey FOREIGN KEY (right_identifier_id) REFERENCES spdx.identifiers(identifier_id); - - --- --- PostgreSQL database dump complete --- - From d0da0318fb77d3c408c631084fe8d88fa175b8c4 Mon Sep 17 00:00:00 2001 From: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> Date: Sun, 11 Jan 2026 15:41:45 -0500 Subject: [PATCH 099/104] add timeout value for the job Signed-off-by: Adrian Edwards <17362949+MoralCode@users.noreply.github.com> --- .github/workflows/functional_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/functional_test.yml b/.github/workflows/functional_test.yml index 544029df0e..eaa50adf30 100644 --- a/.github/workflows/functional_test.yml +++ b/.github/workflows/functional_test.yml @@ -11,6 +11,7 @@ jobs: test: name: test with ${{ matrix.env }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} + timeout-minutes: 15 strategy: fail-fast: false matrix: From c43b2f39a4ae3ab68c0ee2925024eff5e8b26814 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Dec 2025 11:01:32 -0500 Subject: [PATCH 100/104] First draft of new database table for repo_aliases Signed-off-by: Adrian Edwards --- augur/application/db/models/__init__.py | 1 + augur/application/db/models/augur_data.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/augur/application/db/models/__init__.py b/augur/application/db/models/__init__.py index f729f0ac1e..06ca9cb910 100644 --- a/augur/application/db/models/__init__.py +++ b/augur/application/db/models/__init__.py @@ -14,6 +14,7 @@ ContributorRepo, ContributorsAlias, Repo, + HistoricalRepoURLs, RepoTestCoverage, RepoGroupInsight, RepoGroupsListServe, diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 9f7d8c7fb3..78c2ce7151 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -5,6 +5,7 @@ CHAR, Column, Date, + DateTime, Float, ForeignKey, Index, @@ -17,6 +18,7 @@ Text, UniqueConstraint, text, + func ) from sqlalchemy.dialects.postgresql import JSONB, TIMESTAMP, UUID from sqlalchemy.orm import relationship @@ -1166,6 +1168,23 @@ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_ +class HistoricalRepoURLs(Base): + """ A table for storing previously-used git URLs for a repository + This is used to enable lookups that resolve historical URLs to the repo_id for a given repository + When a repo is detected as moved and its url is updated in the repo table, the old URL gets added to this table. + + The date_collected field allows for history of a repo with multiple URL changes to be inferred, + for example, when an old url is moved to this table, its date serves as both an end date + for the previous old url, and as the start date for the one that was just moved. + The currently-valid URL remains in the repo table and is not moved here until it has been superseded. + """ + + __tablename__ = "historical_repo_urls" + __table_args__ = {"schema": "augur_data"} + + repo_id = Column(ForeignKey("augur_data.repo.repo_id"), primary_key=True) + git_url = Column(String, primary_key=True) + date_collected = Column(DateTime(timezone=True), server_default=func.now(), nullable=True) class RepoTestCoverage(Base): __tablename__ = "repo_test_coverage" From 4c9244a3859bfe03dff3f80c74ad33eebfb5819d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Tue, 2 Dec 2025 14:11:56 -0500 Subject: [PATCH 101/104] add code in update_repo_with_dict that adds values to the new repo_aliases table Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 6b47df1a32..2ad96de671 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -7,6 +7,8 @@ from augur.tasks.util.collection_state import CollectionState from augur.application.db.util import execute_session_query from augur.application.db.lib import bulk_insert_dicts +from augur.application.db.models import HistoricalRepoURLs +from sqlalchemy.exc import IntegrityError class RepoMovedException(Exception): @@ -29,12 +31,24 @@ def update_repo_with_dict(repo,new_dict,logger): """ to_insert = dict(repo.__dict__) del to_insert['_sa_instance_state'] + + old_url = to_insert["repo_git"] + repo_id = to_insert["repo_id"] + + with DatabaseSession(logger) as session: + previous_alias = HistoricalRepoURLs(repo_id=repo_id, git_url=old_url) + try: + result = session.add(previous_alias) + session.commit() + except IntegrityError as e: #Unique violation + session.rollback() + to_insert.update(new_dict) result = bulk_insert_dicts(logger, to_insert, Repo, ['repo_id']) url = to_insert['repo_git'] - logger.info(f"Updated repo for {url}\n") + logger.info(f"Updated repo {old_url} to {url} and set alias\n") From 1e7a26d8eff053c6ff8b90c574998022bdf596ee Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 15 Dec 2025 16:49:35 -0500 Subject: [PATCH 102/104] seems like retry needs a value passed into it. Signed-off-by: Adrian Edwards --- augur/tasks/github/detect_move/core.py | 9 ++++++--- augur/tasks/github/detect_move/tasks.py | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/augur/tasks/github/detect_move/core.py b/augur/tasks/github/detect_move/core.py index 2ad96de671..a3eb3803d6 100644 --- a/augur/tasks/github/detect_move/core.py +++ b/augur/tasks/github/detect_move/core.py @@ -12,7 +12,9 @@ class RepoMovedException(Exception): - pass + def __init__(self, message, new_url=None): + super().__init__(message) + self.new_url = new_url class RepoGoneException(Exception): pass @@ -49,6 +51,7 @@ def update_repo_with_dict(repo,new_dict,logger): url = to_insert['repo_git'] logger.info(f"Updated repo {old_url} to {url} and set alias\n") + return url @@ -104,9 +107,9 @@ def ping_github_for_repo_move(session, key_auth, repo, logger,collection_hook='c 'description': f"(Originally hosted at {url}) {old_description}" } - update_repo_with_dict(repo, repo_update_dict, logger) + new_url = update_repo_with_dict(repo, repo_update_dict, logger) - raise RepoMovedException("ERROR: Repo has moved! Resetting Collection!") + raise RepoMovedException("ERROR: Repo has moved! Resetting Collection!", new_url=new_url) #Mark as ignore if 404 if response_from_gh.status_code == 404: diff --git a/augur/tasks/github/detect_move/tasks.py b/augur/tasks/github/detect_move/tasks.py index 6f7b04b8de..249ff1a0de 100644 --- a/augur/tasks/github/detect_move/tasks.py +++ b/augur/tasks/github/detect_move/tasks.py @@ -29,7 +29,10 @@ def detect_github_repo_move_core(repo_git : str) -> None: try: ping_github_for_repo_move(session, key_auth, repo, logger) except RepoMovedException as e: - raise Retry(e) + if e.new_url is not None: + raise Retry(e.new_url) + else: + raise Reject(e) except RepoGoneException as e: raise Reject(e) From e05bcce8d47625c1a882b1943ded62c739e9a31f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 15 Dec 2025 15:53:57 -0500 Subject: [PATCH 103/104] Add migration for new table Signed-off-by: Adrian Edwards --- .../38_add_historical_repo_urls_table.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 augur/application/schema/alembic/versions/38_add_historical_repo_urls_table.py diff --git a/augur/application/schema/alembic/versions/38_add_historical_repo_urls_table.py b/augur/application/schema/alembic/versions/38_add_historical_repo_urls_table.py new file mode 100644 index 0000000000..dda3c17188 --- /dev/null +++ b/augur/application/schema/alembic/versions/38_add_historical_repo_urls_table.py @@ -0,0 +1,35 @@ +"""add historical repo urls table + +Revision ID: 38 +Revises: 37 +Create Date: 2025-12-15 15:50:31.819780 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '38' +down_revision = '37' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('historical_repo_urls', + sa.Column('repo_id', sa.BigInteger(), nullable=False), + sa.Column('git_url', sa.String(), nullable=False), + sa.Column('date_collected', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True), + sa.ForeignKeyConstraint(['repo_id'], ['augur_data.repo.repo_id'], ), + sa.PrimaryKeyConstraint('repo_id', 'git_url'), + schema='augur_data' + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('historical_repo_urls', schema='augur_data') + # ### end Alembic commands ### From 0592017b6fa26ef46ffb678cf0eeccd777f63d11 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Tue, 20 Jan 2026 18:19:54 -0600 Subject: [PATCH 104/104] updated metadata Signed-off-by: Sean P. Goggins --- README.md | 6 +++--- metadata.py | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index e59180de0c..0a0ab7deab 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.91.0 +# Augur NEW Release v0.92.0 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data - less data carpentry for everyone else! The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot), a public instance of 8Knot is available [here](https://metrix.chaoss.io) - this is tied to a public instance of [Augur](https://ai.chaoss.io). @@ -11,7 +11,7 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o ## NEW RELEASE ALERT! **If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**. -Augur is now releasing a dramatically improved new version. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.91.0). +Augur is now releasing a dramatically improved new version. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.92.0). - The `release` branch is a stable version of our new architecture, which features: @@ -83,7 +83,7 @@ We strongly believe that much of what makes open source so great is the incredib ## License, Copyright, and Funding -Copyright © 2025 University of Nebraska at Omaha, University of Missouri, Brian Warner, and the CHAOSS Project. +Copyright © 2025 University of Missouri, Sean Goggins, and Derek Howard. Augur is free software: you can redistribute it and/or modify it under the terms of the MIT License as published by the Open Source Initiative. See the [LICENSE](LICENSE) file for more details. diff --git a/metadata.py b/metadata.py index cf6893b2f8..71827630af 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,10 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.91.0" -__release__ = "v0.91.0 (What's Up Augur? We are Software!)" +__version__ = "0.92.0" +__release__ = "v0.92.0 (Paladin Penguin)" +__author__ = "Augur Team" +__author_email__ = "outdoors@acm.org" __license__ = "MIT" -__copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Derek Howard, Sean Goggins, Brian Warner & Augurlabs 2025, Red Hat Software" +__copyright__ = "University of Missouri, Derek Howard, Sean Goggins, Augurlabs 2025"