From 2e923f5cf07b52b83fdced6cdf2e7029f659faf7 Mon Sep 17 00:00:00 2001 From: Cali Dolfi <47504866+cdolfi@users.noreply.github.com> Date: Mon, 2 Jun 2025 11:03:31 -0400 Subject: [PATCH 01/31] line missed in bug fix to allow non root user Signed-off-by: Cali Dolfi --- docker/backend/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 38e7ca0b1c..34ef8ed788 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -113,5 +113,6 @@ RUN ln -s /cache /augur/augur/static/cache COPY --chmod=u=rwx,go=rx ./docker/backend/entrypoint.sh / COPY --chmod=u=rwx,go=rx ./docker/backend/init.sh / +RUN chmod +x /entrypoint.sh /init.sh ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] CMD ["/init.sh"] From d000e490a357d8b9f63238f99932d1728b355919 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 3 Jun 2025 18:23:15 -0500 Subject: [PATCH 02/31] remove generic exception handling and make missing pr and issue logs info since thy are expected --- augur/tasks/github/events.py | 4 ++-- augur/tasks/github/messages.py | 8 +++----- augur/tasks/github/pull_requests/commits_model/core.py | 6 ++---- augur/tasks/github/pull_requests/files_model/core.py | 10 ++++++++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/augur/tasks/github/events.py b/augur/tasks/github/events.py index 08efb35d92..38a5e9e9c6 100644 --- a/augur/tasks/github/events.py +++ b/augur/tasks/github/events.py @@ -325,7 +325,7 @@ def _collect_and_process_issue_events(self, owner, repo, repo_id, key_auth, sinc self._tool_source, self._tool_version, self._data_source) ) except UrlNotFoundException as e: - self._logger.warning(f"{self.repo_identifier}: Url not found for {event_url}") + self._logger.info(f"{self.repo_identifier}: Issue with number of {issue_number} returned 404 on event data. Skipping.") if len(events) > 500: self._insert_contributors(contributors) @@ -386,7 +386,7 @@ def _collect_and_process_pr_events(self, owner, repo, repo_id, key_auth, since): self._tool_source, self._tool_version, self._data_source) ) except UrlNotFoundException: - self._logger.warning(f"{self.repo_identifier}: Url not found for {event_url}") + self._logger.info(f"{self.repo_identifier}: PR with number of {pr_number} returned 404 on event data. Skipping.") continue if len(events) > 500: diff --git a/augur/tasks/github/messages.py b/augur/tasks/github/messages.py index 40b35e942d..812af0fada 100644 --- a/augur/tasks/github/messages.py +++ b/augur/tasks/github/messages.py @@ -120,12 +120,10 @@ def process_large_issue_and_pr_message_collection(repo_id, repo_git: str, logger try: messages = list(github_data_access.paginate_resource(comment_url)) all_data += messages - except UrlNotFoundException as e: - logger.warning(f"{task_name}: 404 on comment URL {comment_url}. Skipping. Reason: {e}") + except UrlNotFoundException: + logger.info(f"{task_name}: PR or issue comment url of {comment_url} returned 404. Skipping.") skipped_urls += 1 - except Exception as e: - logger.error(f"{task_name}: Unexpected error on comment URL {comment_url}: {e}", exc_info=True) - + if len(all_data) >= 20: process_messages(all_data, task_name, repo_id, logger, augur_db) all_data.clear() diff --git a/augur/tasks/github/pull_requests/commits_model/core.py b/augur/tasks/github/pull_requests/commits_model/core.py index 3867f94733..2df6d66f5d 100644 --- a/augur/tasks/github/pull_requests/commits_model/core.py +++ b/augur/tasks/github/pull_requests/commits_model/core.py @@ -71,10 +71,8 @@ def pull_request_commits_model(repo_id,logger, augur_db, key_auth, full_collecti } all_data.append(pr_commit_row) except UrlNotFoundException: - logger.warning(f"{task_name}: PR #{index + 1} returned 404 on commit data. Skipping.") - except Exception as e: - logger.error(f"{task_name}: Unexpected error while processing PR #{index + 1}: {e}", exc_info=True) - + logger.info(f"{task_name}: PR with url of {pr_info['pr_url']} returned 404 on commit data. Skipping.") + continue if len(all_data) > 0: logger.info(f"{task_name}: Inserting {len(all_data)} rows") diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index badc86cd38..e3c594c08f 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -36,6 +36,8 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection repo = execute_session_query(query, 'one') owner, name = get_owner_repo(repo.repo_git) + task_name = f"{owner}/{name} Pr files" + github_graphql_data_access = GithubGraphQlDataAccess(key_auth, logger) pr_file_rows = [] @@ -90,10 +92,14 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection } pr_file_rows.append(data) - except (NotFoundException, InvalidDataException) as e: - logger.warning(e) + except NotFoundException as e: + logger.info(f"{task_name}: PR with url of {pr_info['pr_url']} returned 404 on file data. Skipping.") + continue + except InvalidDataException as e: + logger.warning(f"{task_name}: PR with url of {pr_info['pr_url']} returned null for file data. Skipping.") continue + if len(pr_file_rows) > 0: # Execute a bulk upsert with sqlalchemy pr_file_natural_keys = ["pull_request_id", "repo_id", "pr_file_path"] From 5bab21370201fd80787d314e88ab7da579e17ea3 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 3 Jun 2025 18:24:50 -0500 Subject: [PATCH 03/31] remove logic that ignored 404s in github data access --- augur/tasks/github/util/github_data_access.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index 890165a8db..3e8a8accc6 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -119,18 +119,7 @@ def make_request(self, url, method="GET", timeout=100): # This is not an issue that is really an Exception. It is more of a nominal signal. if response.status_code == 404: - parsed = urlparse(url) - path = parsed.path.lower() - query = parsed.query.lower() - - if any(k in path for k in ["commits", "files", "comments"]) or \ - any(k in query for k in ["commits", "files", "comments"]): - self.logger.warning( - f"Github response with 404 for PR files, PR commits or messages. " - f"This is a data anomaly in the platform API, not an error. URL: {url}. Response: {response.text}" - ) - else: - raise UrlNotFoundException(f"Could not find {url}") + raise UrlNotFoundException(f"Could not find {url}") if response.status_code == 401: raise NotAuthorizedException(f"Could not authorize with the github api") From f7e16004d8a07ebd1167c7c94da965540a1d9cb1 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Tue, 3 Jun 2025 18:46:34 -0500 Subject: [PATCH 04/31] simplify is_forked and is_archived logic --- augur/tasks/github/repo_info/core.py | 78 +++++++++++----------------- 1 file changed, 29 insertions(+), 49 deletions(-) diff --git a/augur/tasks/github/repo_info/core.py b/augur/tasks/github/repo_info/core.py index e782fde5a5..57cd970bc0 100644 --- a/augur/tasks/github/repo_info/core.py +++ b/augur/tasks/github/repo_info/core.py @@ -1,7 +1,7 @@ #SPDX-License-Identifier: MIT import json import sqlalchemy as s -from augur.tasks.github.util.github_data_access import GithubDataAccess +from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess from augur.tasks.github.util.github_paginator import hit_api from augur.tasks.github.util.util import get_owner_repo @@ -55,60 +55,38 @@ def get_repo_data(logger, url, response): return data """ -def get_repo_data(logger, url, response): - if response is None: - logger.error(f"Failed to retrieve data from {url}. Response is None.") - raise Exception(f"Failed API request to {url}. Got None response.") - data = {} +def get_repo_data(logger, owner, repo): + try: - data = response.json() + url = f'https://api.github.com/repos/{owner}/{repo}' + github_data_access = GithubDataAccess(None, logger) + result = github_data_access.get_resource(url) + return result + except UrlNotFoundException as e: + message = f"GitHub repo was not found or does not exist for endpoint: {url}" + logger.error(message) + raise Exception(message) from e except Exception as e: - logger.warning(f"Failed to parse JSON from {url}: {e}") - try: - data = json.loads(json.dumps(response.text)) # This is effectively a no-op - except Exception as inner_e: - logger.error(f"Completely failed to parse response from {url}: {inner_e}") - raise Exception(f"Unparseable response from {url}") - - if 'errors' in data: - logger.error(f"GitHub API returned errors: {data['errors']}") - raise Exception(f"GitHub returned error response! {data['errors']}") - - if 'id' not in data and 'message' in data: - logger.warning(f"Unexpected response structure from {url}: {data}") - if data['message'] == 'Not Found': - raise Exception(f"GitHub repo was not found or does not exist for endpoint: {url}") - - return data - -def is_forked(key_auth, logger, owner, repo): #/repos/:owner/:repo parent - logger.info('Querying parent info to verify if the repo is forked\n') - url = f'https://api.github.com/repos/{owner}/{repo}' - - r = hit_api(key_auth, url, logger)#requests.get(url, headers=self.headers) + logger.error(e) + raise e - data = get_repo_data(logger, url, r) - - if 'fork' in data: - if 'parent' in data: - return data['parent']['full_name'] +def is_forked(logger, repo_data): #/repos/:owner/:repo parent + logger.info('Determining if the repo is forked\n') + + if 'fork' in repo_data: + if 'parent' in repo_data: + return repo_data['parent']['full_name'] return 'Parent not available' return False -def is_archived(key_auth, logger, owner, repo): - logger.info('Querying committers count\n') - url = f'https://api.github.com/repos/{owner}/{repo}' - - r = hit_api(key_auth, url, logger)#requests.get(url, headers=self.headers) - #self.update_gh_rate_limit(r) +def is_archived(logger, repo_data): + logger.info('Determining if the repo is archived\n') - data = get_repo_data(logger, url, r) - - if 'archived' in data: - if data['archived']: - if 'updated_at' in data: - return data['updated_at'] + if 'archived' in repo_data: + if repo_data['archived']: + if 'updated_at' in repo_data: + return repo_data['updated_at'] return 'Date not available' return False @@ -267,8 +245,10 @@ def repo_info_model(key_auth, repo_orm_obj, logger): execute_sql(insert_statement) # Note that the addition of information about where a repository may be forked from, and whether a repository is archived, updates the `repo` table, not the `repo_info` table. - forked = is_forked(key_auth, logger, owner, repo) - archived = is_archived(key_auth, logger, owner, repo) + repo_data = get_repo_data(logger, owner, repo) + + forked = is_forked(logger, repo_data) + archived = is_archived(logger, repo_data) archived_date_collected = None if archived is not False: archived_date_collected = archived From 78f6b0a2bc447f3ed3eaf2ad63daa47ce97af498 Mon Sep 17 00:00:00 2001 From: Cali Dolfi <47504866+cdolfi@users.noreply.github.com> Date: Fri, 6 Jun 2025 18:07:37 -0400 Subject: [PATCH 05/31] add refresh for explorer_repo_languages Signed-off-by: Cali Dolfi --- augur/tasks/db/refresh_materialized_views.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/augur/tasks/db/refresh_materialized_views.py b/augur/tasks/db/refresh_materialized_views.py index 8a06ac7a61..37e3ef561a 100644 --- a/augur/tasks/db/refresh_materialized_views.py +++ b/augur/tasks/db/refresh_materialized_views.py @@ -88,6 +88,12 @@ def refresh_materialized_views(self): COMMIT; """) + mv14_refresh = s.sql.text(""" + + REFRESH MATERIALIZED VIEW concurrently augur_data.explorer_repo_languages with data; + COMMIT; + """) + try: execute_sql(mv1_refresh) except Exception as e: @@ -166,6 +172,12 @@ def refresh_materialized_views(self): logger.info(f"error is {e}") pass + try: + execute_sql(mv14_refresh) + except Exception as e: + logger.info(f"error is {e}") + pass + #Now refresh facade tables #Use this class to get all the settings and #utility functions for facade From 68de2d448c53172870c0c34d801fc9afa6c597d7 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 17:27:31 -0500 Subject: [PATCH 06/31] updated Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 9262f241b4..06290e7b73 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -76,7 +76,7 @@ def generate_scorecard(logger, repo_git): path = repo_git[8:] if path[-4:] == '.git': path = path.replace(".git", "") - command = '--local=' + path + command = '--repo=' + path #this is path where our scorecard project is located path_to_scorecard = os.getenv('SCORECARD_DIR', os.environ['HOME'] + '/scorecard') From acebc89b177a4dc8576299fec351c1d05746b940 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 20:31:37 -0500 Subject: [PATCH 07/31] updating OpenSSF Scorecard insert Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 06290e7b73..64a856326a 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -132,7 +132,7 @@ def generate_scorecard(logger, repo_git): } to_insert.append(repo_deps_scorecard) - bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_id","name"]) + bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["data_collection_date","repo_id","name"]) logger.info(f"Done generating scorecard for repo {repo_id} from path {path}") From c81bf947247c4de9cb618de472a6629d3e2af5ef Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 20:32:46 -0500 Subject: [PATCH 08/31] task collection id update Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 64a856326a..99806f1517 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -39,7 +39,7 @@ def generate_deps_data(logger, repo_git): 'dep_count' : dep.count, 'dep_language' : dep.language, 'tool_source': 'deps_model', - 'tool_version': '0.43.9', + 'tool_version': '0.89.1', 'data_source': 'Git', 'data_collection_date': scan_date } From a67bce2abdebe28e58e9ce8ecdd63c6f3624ec5b Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 20:46:05 -0500 Subject: [PATCH 09/31] alembic upgrade Signed-off-by: Sean P. Goggins --- .../versions/32_update_openssf_deps.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 augur/application/schema/alembic/versions/32_update_openssf_deps.py diff --git a/augur/application/schema/alembic/versions/32_update_openssf_deps.py b/augur/application/schema/alembic/versions/32_update_openssf_deps.py new file mode 100644 index 0000000000..60172fd6c1 --- /dev/null +++ b/augur/application/schema/alembic/versions/32_update_openssf_deps.py @@ -0,0 +1,35 @@ +"""Update pr events unique + +Revision ID: 31 +Revises: 30 +Create Date: 2025-03-08 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text +from augur.application.db import create_database_engine, get_database_string + + +# revision identifiers, used by Alembic. +revision = '32' +down_revision = '31' +branch_labels = None +depends_on = None +def upgrade(): + op.drop_constraint( + 'deps-scorecard-insert-unique', + 'repo_deps_scorecard', + schema='augur_data', + type_='unique' + ) + +def downgrade(): + # You would need to know the columns involved in the original unique constraint to recreate it. + # Example below assumes the columns were ('repo_id', 'scorecard_id') + op.create_unique_constraint( + 'deps-scorecard-insert-unique', + 'repo_deps_scorecard', + ['repo_id', 'scorecard_id'], + schema='augur_data' + ) \ No newline at end of file From e49c373034a1072ef760af109756aa1e6185a8db Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 20:46:54 -0500 Subject: [PATCH 10/31] alembic finalization Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/32_update_openssf_deps.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/augur/application/schema/alembic/versions/32_update_openssf_deps.py b/augur/application/schema/alembic/versions/32_update_openssf_deps.py index 60172fd6c1..f3fa6283be 100644 --- a/augur/application/schema/alembic/versions/32_update_openssf_deps.py +++ b/augur/application/schema/alembic/versions/32_update_openssf_deps.py @@ -1,8 +1,8 @@ """Update pr events unique -Revision ID: 31 -Revises: 30 -Create Date: 2025-03-08 +Revision ID: 32 +Revises: 31 +Create Date: 2025-06-06 """ from alembic import op From 9c11f636086fbb8e1b9b70e5a2682a5fc61466f5 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 20:48:43 -0500 Subject: [PATCH 11/31] updating upgrade text message Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/32_update_openssf_deps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/application/schema/alembic/versions/32_update_openssf_deps.py b/augur/application/schema/alembic/versions/32_update_openssf_deps.py index f3fa6283be..1230e3ed83 100644 --- a/augur/application/schema/alembic/versions/32_update_openssf_deps.py +++ b/augur/application/schema/alembic/versions/32_update_openssf_deps.py @@ -1,4 +1,4 @@ -"""Update pr events unique +"""Remove unecessary OpenSSF Scorecard Unique Constraint Revision ID: 32 Revises: 31 From f9cb94490b8001a829779137b6e39600b2fc34f5 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 20:55:30 -0500 Subject: [PATCH 12/31] updated alembic Signed-off-by: Sean P. Goggins --- .../alembic/versions/32_update_openssf_deps.py | 14 ++++++++++++-- augur/tasks/git/dependency_tasks/core.py | 4 ++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/augur/application/schema/alembic/versions/32_update_openssf_deps.py b/augur/application/schema/alembic/versions/32_update_openssf_deps.py index 1230e3ed83..ba7fe74056 100644 --- a/augur/application/schema/alembic/versions/32_update_openssf_deps.py +++ b/augur/application/schema/alembic/versions/32_update_openssf_deps.py @@ -23,10 +23,20 @@ def upgrade(): schema='augur_data', type_='unique' ) + op.create_unique_constraint( + 'deps_scorecard_new_unique', + 'repo_deps_scorecard', + ['repo_id', 'scorecard_id'], + schema='augur_data' + ) def downgrade(): - # You would need to know the columns involved in the original unique constraint to recreate it. - # Example below assumes the columns were ('repo_id', 'scorecard_id') + op.drop_constraint( + 'deps_scorecard_new_unique', + 'repo_deps_scorecard', + schema='augur_data', + type_='unique' + ) op.create_unique_constraint( 'deps-scorecard-insert-unique', 'repo_deps_scorecard', diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 99806f1517..75c9ee6eb4 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -111,7 +111,7 @@ def generate_scorecard(logger, repo_git): 'scorecard_check_details': required_output['repo'], 'score': required_output['score'], 'tool_source': 'scorecard_model', - 'tool_version': '0.43.9', + 'tool_version': '0.89.1', 'data_source': 'Git', 'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') } @@ -126,7 +126,7 @@ def generate_scorecard(logger, repo_git): 'scorecard_check_details': check, 'score': check['score'], 'tool_source': 'scorecard_model', - 'tool_version': '0.43.9', + 'tool_version': '0.89.1', 'data_source': 'Git', 'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') } From cb6abc656ca5da89edc5176589638fbe9121feed Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 21:00:33 -0500 Subject: [PATCH 13/31] udpated alembic Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/32_update_openssf_deps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/schema/alembic/versions/32_update_openssf_deps.py b/augur/application/schema/alembic/versions/32_update_openssf_deps.py index ba7fe74056..7d426e582c 100644 --- a/augur/application/schema/alembic/versions/32_update_openssf_deps.py +++ b/augur/application/schema/alembic/versions/32_update_openssf_deps.py @@ -26,7 +26,7 @@ def upgrade(): op.create_unique_constraint( 'deps_scorecard_new_unique', 'repo_deps_scorecard', - ['repo_id', 'scorecard_id'], + ['repo_id', 'repo_deps_scorecard_id'], schema='augur_data' ) @@ -40,6 +40,6 @@ def downgrade(): op.create_unique_constraint( 'deps-scorecard-insert-unique', 'repo_deps_scorecard', - ['repo_id', 'scorecard_id'], + ['repo_id', 'name'], schema='augur_data' ) \ No newline at end of file From 032a62a43fe52f146bd59228ba25d13f4659b05d Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Fri, 6 Jun 2025 21:05:37 -0500 Subject: [PATCH 14/31] aligning code with db Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 75c9ee6eb4..515910b986 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -46,7 +46,7 @@ def generate_deps_data(logger, repo_git): to_insert.append(repo_deps) - bulk_insert_dicts(logger, to_insert,RepoDependency,["repo_id","dep_name","data_collection_date"]) + bulk_insert_dicts(logger, to_insert,RepoDependency,["repo_id","repo_deps_scorecard_id"]) logger.info(f"Inserted {len(deps)} dependencies for repo {repo_id}") From 98079ded5cc1266e93b8a87562b8c44e6239e706 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 12:36:38 -0500 Subject: [PATCH 15/31] update test Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 515910b986..ec98030b27 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -132,7 +132,7 @@ def generate_scorecard(logger, repo_git): } to_insert.append(repo_deps_scorecard) - bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["data_collection_date","repo_id","name"]) + bulk_insert_dicts(logger, to_insert, RepoDepsScorecard) logger.info(f"Done generating scorecard for repo {repo_id} from path {path}") From d335f4e90b1ce27a65b7a4ae16d3e4d198e88114 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 12:40:48 -0500 Subject: [PATCH 16/31] another try Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index ec98030b27..1666bf70ca 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -132,7 +132,8 @@ def generate_scorecard(logger, repo_git): } to_insert.append(repo_deps_scorecard) - bulk_insert_dicts(logger, to_insert, RepoDepsScorecard) + bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["rep_deps_scorecard_id"]) + logger.info(f"Inserted {len(to_insert)} scorecard entries for repo {repo_id}") logger.info(f"Done generating scorecard for repo {repo_id} from path {path}") From c8ee1561cbc7c749e3187a4e6d7f8ae5258a240f Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 12:49:26 -0500 Subject: [PATCH 17/31] typo Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 1666bf70ca..9169a8053a 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -132,7 +132,7 @@ def generate_scorecard(logger, repo_git): } to_insert.append(repo_deps_scorecard) - bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["rep_deps_scorecard_id"]) + bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_deps_scorecard_id"]) logger.info(f"Inserted {len(to_insert)} scorecard entries for repo {repo_id}") logger.info(f"Done generating scorecard for repo {repo_id} from path {path}") From b26fb371a8dd6b1649c36b560c53aa7e04d3b99b Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 18:26:25 -0500 Subject: [PATCH 18/31] OpenSSF Scorecard double fixer Signed-off-by: Sean P. Goggins --- .../versions/33_update_openssf_deps.py | 45 +++++++++++++++++++ augur/tasks/git/dependency_tasks/core.py | 7 +-- 2 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 augur/application/schema/alembic/versions/33_update_openssf_deps.py diff --git a/augur/application/schema/alembic/versions/33_update_openssf_deps.py b/augur/application/schema/alembic/versions/33_update_openssf_deps.py new file mode 100644 index 0000000000..7593f80696 --- /dev/null +++ b/augur/application/schema/alembic/versions/33_update_openssf_deps.py @@ -0,0 +1,45 @@ +"""Remove unecessary OpenSSF Scorecard Unique Constraint + +Revision ID: 33 +Revises: 32 +Create Date: 2025-06-06 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text +from augur.application.db import create_database_engine, get_database_string + + +# revision identifiers, used by Alembic. +revision = '33' +down_revision = '32' +branch_labels = None +depends_on = None +def upgrade(): + op.drop_constraint( + 'deps_scorecard_new_unique', + 'repo_deps_scorecard', + schema='augur_data', + type_='unique' + ) + op.create_unique_constraint( + 'deps_scorecard_new_unique', + 'repo_deps_scorecard', + ['repo_id', 'name', 'data_collection_date'], + schema='augur_data' + ) + +def downgrade(): + op.drop_constraint( + 'deps_scorecard_new_unique', + 'repo_deps_scorecard', + schema='augur_data', + type_='unique' + ) + op.create_unique_constraint( + 'deps-scorecard-insert-unique', + 'repo_deps_scorecard', + ['repo_id', 'name'], + schema='augur_data' + ) \ No newline at end of file diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 9169a8053a..28d56defaf 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -46,8 +46,8 @@ def generate_deps_data(logger, repo_git): to_insert.append(repo_deps) - bulk_insert_dicts(logger, to_insert,RepoDependency,["repo_id","repo_deps_scorecard_id"]) - + bulk_insert_dicts(logger, to_insert,RepoDependency,["repo_id","dep_name","data_collection_date"]) + logger.info(f"Inserted {len(deps)} dependencies for repo {repo_id}") """ @@ -131,8 +131,9 @@ def generate_scorecard(logger, repo_git): 'data_collection_date': datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') } to_insert.append(repo_deps_scorecard) + + bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_id","name","data_collection_data"]) - bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_deps_scorecard_id"]) logger.info(f"Inserted {len(to_insert)} scorecard entries for repo {repo_id}") logger.info(f"Done generating scorecard for repo {repo_id} from path {path}") From e364065eb032b26cb0f7446b93ccbbd123e900d7 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 18:31:39 -0500 Subject: [PATCH 19/31] db typo fix Signed-off-by: Sean P. Goggins --- .../schema/alembic/versions/32_update_openssf_deps.py | 2 +- .../schema/alembic/versions/33_update_openssf_deps.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/application/schema/alembic/versions/32_update_openssf_deps.py b/augur/application/schema/alembic/versions/32_update_openssf_deps.py index 7d426e582c..50343067f9 100644 --- a/augur/application/schema/alembic/versions/32_update_openssf_deps.py +++ b/augur/application/schema/alembic/versions/32_update_openssf_deps.py @@ -1,4 +1,4 @@ -"""Remove unecessary OpenSSF Scorecard Unique Constraint +"""Remove old OpenSSF Scorecard Unique Constraint; Add new one. Revision ID: 32 Revises: 31 diff --git a/augur/application/schema/alembic/versions/33_update_openssf_deps.py b/augur/application/schema/alembic/versions/33_update_openssf_deps.py index 7593f80696..c10d041a7b 100644 --- a/augur/application/schema/alembic/versions/33_update_openssf_deps.py +++ b/augur/application/schema/alembic/versions/33_update_openssf_deps.py @@ -1,4 +1,4 @@ -"""Remove unecessary OpenSSF Scorecard Unique Constraint +"""Fix OpenSSF Scorecard Unique Constraint Revision ID: 33 Revises: 32 From 6c18ed4e544455660097357dff8560e87025fcb6 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 18:41:56 -0500 Subject: [PATCH 20/31] bug fix? Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 28d56defaf..391ab56e83 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -140,4 +140,4 @@ def generate_scorecard(logger, repo_git): except Exception as e: - raise MetadataException(e, f"required_output: {required_output}") + raise MetadataException(e, f"required_output: {required_output}; error {e}") From b4dbfea8f6b98e0abcddaf99086e99c26b861419 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 18:46:30 -0500 Subject: [PATCH 21/31] added error messages Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 391ab56e83..6efb4718e2 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -140,4 +140,6 @@ def generate_scorecard(logger, repo_git): except Exception as e: + logger.info(f"\n\nThe ERROR: {e} \n\n") + raise MetadataException(e, f"required_output: {required_output}; error {e}") From ce0135c0fa206114cdf0ade4d06c3133853298a8 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 18:48:18 -0500 Subject: [PATCH 22/31] typo fix 2 Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index 6efb4718e2..ffd1939dd4 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -132,7 +132,7 @@ def generate_scorecard(logger, repo_git): } to_insert.append(repo_deps_scorecard) - bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_id","name","data_collection_data"]) + bulk_insert_dicts(logger, to_insert, RepoDepsScorecard, ["repo_id","name","data_collection_date"]) logger.info(f"Inserted {len(to_insert)} scorecard entries for repo {repo_id}") From 217ed9f3a22819f522d8691ff5a82b8a6c7a0952 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 18:55:07 -0500 Subject: [PATCH 23/31] Update augur/tasks/git/dependency_tasks/core.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index ffd1939dd4..be9d75b9a2 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -140,6 +140,6 @@ def generate_scorecard(logger, repo_git): except Exception as e: - logger.info(f"\n\nThe ERROR: {e} \n\n") + logger.exception("Error generating scorecard", exc_info=e) raise MetadataException(e, f"required_output: {required_output}; error {e}") From 26e4c661976f0af35a1d077fb909f9498dbc58be Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sat, 7 Jun 2025 18:55:19 -0500 Subject: [PATCH 24/31] Update augur/tasks/git/dependency_tasks/core.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Sean P. Goggins --- augur/tasks/git/dependency_tasks/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/augur/tasks/git/dependency_tasks/core.py b/augur/tasks/git/dependency_tasks/core.py index be9d75b9a2..0f9b08d7d3 100644 --- a/augur/tasks/git/dependency_tasks/core.py +++ b/augur/tasks/git/dependency_tasks/core.py @@ -141,5 +141,4 @@ def generate_scorecard(logger, repo_git): except Exception as e: logger.exception("Error generating scorecard", exc_info=e) - raise MetadataException(e, f"required_output: {required_output}; error {e}") From a098ed66c7cb55d08d9e30f696cc07ee888b9d37 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 8 Jun 2025 10:29:15 -0500 Subject: [PATCH 25/31] handling NULL descriptions in relesaes Signed-off-by: Sean P. Goggins --- augur/tasks/github/releases/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py index 3192401ae3..ada68d1256 100644 --- a/augur/tasks/github/releases/core.py +++ b/augur/tasks/github/releases/core.py @@ -26,7 +26,7 @@ def get_release_inf(repo_id, release, tag_only): 'release_id': release['id'], 'repo_id': repo_id, 'release_name': release['name'], - 'release_description': release['description'], + 'release_description': release['description'] if release['description'] is not None else '', 'release_author': author, 'release_created_at': release['createdAt'], 'release_published_at': release['publishedAt'], From fe696f732cfe0c7e6dfe0a75747b05ee83e93afa Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 8 Jun 2025 14:04:29 -0500 Subject: [PATCH 26/31] fixing releases NULL handling Signed-off-by: Sean P. Goggins --- augur/tasks/github/releases/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py index ada68d1256..239b83dce9 100644 --- a/augur/tasks/github/releases/core.py +++ b/augur/tasks/github/releases/core.py @@ -54,6 +54,7 @@ def get_release_inf(repo_id, release, tag_only): 'release_id': release['id'], 'repo_id': repo_id, 'release_name': release['name'], + 'release_description': 'tag_only', 'release_author': author, 'release_tag_name': release['name'], 'tag_only': tag_only From 4a17efaa8c9f4391d8bd52559deca2500547b068 Mon Sep 17 00:00:00 2001 From: Andrew Brain Date: Sun, 8 Jun 2025 17:06:19 -0500 Subject: [PATCH 27/31] fix: key error --- augur/tasks/github/pull_requests/files_model/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/augur/tasks/github/pull_requests/files_model/core.py b/augur/tasks/github/pull_requests/files_model/core.py index e3c594c08f..cbecb44d6d 100644 --- a/augur/tasks/github/pull_requests/files_model/core.py +++ b/augur/tasks/github/pull_requests/files_model/core.py @@ -93,10 +93,10 @@ def pull_request_files_model(repo_id,logger, augur_db, key_auth, full_collection pr_file_rows.append(data) except NotFoundException as e: - logger.info(f"{task_name}: PR with url of {pr_info['pr_url']} returned 404 on file data. Skipping.") + logger.info(f"{task_name}: PR with number of {pr_info['pr_src_number']} returned 404 on file data. Skipping.") continue except InvalidDataException as e: - logger.warning(f"{task_name}: PR with url of {pr_info['pr_url']} returned null for file data. Skipping.") + logger.warning(f"{task_name}: PR with number of {pr_info['pr_src_number']} returned null for file data. Skipping.") continue From fe4edc769778562909e3b8c52d29d551ddf56502 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 8 Jun 2025 17:10:53 -0500 Subject: [PATCH 28/31] Update README.md Signed-off-by: Sean P. Goggins --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 230e2074f8..8e4d864510 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.89.0 +# Augur NEW Release v0.89.2 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data - less data carpentry for everyone else! The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot), a public instance of 8Knot is available [here](https://metrix.chaoss.io) - this is tied to a public instance of [Augur](https://ai.chaoss.io). @@ -11,7 +11,7 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o ## NEW RELEASE ALERT! **If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**. -Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.89.0). +Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.89.2). - The `main` branch is a stable version of our new architecture, which features: From 8e3fff74b0c64fc9829bccea2c4123717ea9882d Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 8 Jun 2025 17:11:20 -0500 Subject: [PATCH 29/31] Update metadata.py Signed-off-by: Sean P. Goggins --- metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata.py b/metadata.py index acd1292d10..725c0de105 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.89.0" -__release__ = "v0.89.0 (Midnight Sun)" +__version__ = "0.89.2" +__release__ = "v0.89.2 (Midnight Sun)" __license__ = "MIT" __copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Sean Goggins, Brian Warner & Augurlabs 2025" From f7679d498e6c5f372fab358ce9ed609539251ab9 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 8 Jun 2025 17:21:07 -0500 Subject: [PATCH 30/31] Update README.md Signed-off-by: Sean P. Goggins --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8e4d864510..9567757aad 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.89.2 +# Augur NEW Release v0.89.3 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data - less data carpentry for everyone else! The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot), a public instance of 8Knot is available [here](https://metrix.chaoss.io) - this is tied to a public instance of [Augur](https://ai.chaoss.io). @@ -11,7 +11,7 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o ## NEW RELEASE ALERT! **If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**. -Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.89.2). +Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.89.3). - The `main` branch is a stable version of our new architecture, which features: From 7f8f04928b00dc69c5b000e6ac288c063f75b3b9 Mon Sep 17 00:00:00 2001 From: "Sean P. Goggins" Date: Sun, 8 Jun 2025 17:21:28 -0500 Subject: [PATCH 31/31] Update metadata.py Signed-off-by: Sean P. Goggins --- metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata.py b/metadata.py index 725c0de105..601b018631 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.89.2" -__release__ = "v0.89.2 (Midnight Sun)" +__version__ = "0.89.3" +__release__ = "v0.89.3 (Midnight Sun)" __license__ = "MIT" __copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Sean Goggins, Brian Warner & Augurlabs 2025"