diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index e7fa4b262c..3a0e3f953a 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -55,9 +55,21 @@ jobs: name: End-to-end test (Docker) runs-on: ubuntu-latest steps: + - name: Remove unnecessary files from the base image + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Checkout repository uses: actions/checkout@v4 + - name: Extract project version + id: version + run: | + VERSION=$(python -c "exec(open('metadata.py').read()); print(__version__)") + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "Using version: $VERSION" + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 id: setup-buildx @@ -67,6 +79,7 @@ jobs: with: context: . file: ./docker/database/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_database:test cache-from: type=gha,scope=container-database @@ -78,6 +91,7 @@ jobs: with: context: . file: ./docker/keyman/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_keyman:test cache-from: type=gha,scope=container-keyman @@ -89,6 +103,7 @@ jobs: with: context: . file: ./docker/rabbitmq/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_rabbitmq:test cache-from: type=gha,scope=container-rabbitmq @@ -100,6 +115,7 @@ jobs: with: context: . file: ./docker/backend/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_backend:test cache-from: type=gha,scope=container-backend @@ -300,7 +316,6 @@ jobs: - database - keyman - rabbitmq - - empty_database runs-on: ubuntu-latest steps: - name: Checkout repository diff --git a/CITATION.cff b/CITATION.cff index e26f3d8a86..01514fb22f 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -10,5 +10,5 @@ authors: given-names: Matt title: "Open Source Community Health: Analytical Metrics and Their Corresponding Narratives" doi: 10.1109/SoHeal52568.2021.00010 -date-released: 2021 +date-released: 2021-01-01 url: https://www.seangoggins.net/wp-content/plugins/zotpress/lib/request/request.dl.php?api_user_id=655145&dlkey=HNG22ZSU&content_type=application/pdf diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 99d09cae9d..4df2a8dff2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,6 +6,10 @@ If you are new to open source, we recommend GitHub's excellent guide on "[How to Before getting started, please make sure you've read the [README](README.md) to get a primer on our project. Augur's documentation can be found [here](https://oss-augur.readthedocs.io/en/main/). +## Join the Community + +We encourage all contributors to join the [CHAOSS Slack workspace](https://chaoss.community/kb-getting-started/) and participate in the `#wg-augur-8knot` channel. This is a great place to ask questions, get help with issues, participate in discussions, and stay updated on community meetings and planning. Don't hesitate to introduce yourself and ask for help if you get stuck! + ## Opening an issue If you're experiencing an issue with Augur or have a question you'd like help answering, please feel free to open an [issue](https://github.com/chaoss/augur/issues). To help us prevent duplicates, we kindly ask that you briefly search for your problem or question in our [issues](https://github.com/chaoss/augur/issues) before opening a new one. @@ -97,6 +101,7 @@ git push origin master ### CHAOSS - [Website](https://chaoss.community/) - [Get Involved](https://chaoss.community/participate) +- [Join the CHAOSS Slack](https://chaoss.community/kb-getting-started/) - Join the `#wg-augur-8knot` channel to participate in discussions, meetings, and planning - [Metrics](https://github.com/chaoss/metrics) - [Evolution Metrics Working Group](https://github.com/chaoss/wg-evolution) - [Common Metrics Working Group](https://github.com/chaoss/wg-common) diff --git a/augur/api/metrics/repo_meta.py b/augur/api/metrics/repo_meta.py index ffc8fc84ef..c39922e17b 100644 --- a/augur/api/metrics/repo_meta.py +++ b/augur/api/metrics/repo_meta.py @@ -1240,3 +1240,59 @@ def aggregate_summary(repo_group_id, repo_id=None, begin_date=None, end_date=Non results = pd.read_sql(summarySQL, conn, params={'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date}) return results + +@register_metric() +def clones(repo_group_id, repo_id=None, begin_date=None, end_date=None): + """ + Returns the number of repository clones (total and unique) for a given repo or repo group. + :param repo_group_id: The repository's repo_group_id + :param repo_id: The repository's repo_id, defaults to None + :param begin_date: Start date for filtering clone data (optional) + :param end_date: End date for filtering clone data (optional) + :return: DataFrame of clone counts (total and unique) per day + """ + if not begin_date: + begin_date = '1970-1-1 00:00:00' + if not end_date: + end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + if repo_id: + clones_sql = s.sql.text(""" + SELECT + repo_id, + clone_data_timestamp AS date, + count_clones AS total_clones, + unique_clones + FROM augur_data.repo_clones_data + WHERE repo_id = :repo_id + AND clone_data_timestamp BETWEEN :begin_date AND :end_date + ORDER BY clone_data_timestamp + """) + with current_app.engine.connect() as conn: + results = pd.read_sql(clones_sql, conn, params={ + 'repo_id': repo_id, + 'begin_date': begin_date, + 'end_date': end_date + }) + return results + else: + clones_sql = s.sql.text(""" + SELECT + repo_id, + clone_data_timestamp AS date, + count_clones AS total_clones, + unique_clones + FROM augur_data.repo_clones_data + WHERE repo_id IN ( + SELECT repo_id FROM augur_data.repo WHERE repo_group_id = :repo_group_id + ) + AND clone_data_timestamp BETWEEN :begin_date AND :end_date + ORDER BY repo_id, clone_data_timestamp + """) + with current_app.engine.connect() as conn: + results = pd.read_sql(clones_sql, conn, params={ + 'repo_group_id': repo_group_id, + 'begin_date': begin_date, + 'end_date': end_date + }) + return results diff --git a/augur/api/routes/__init__.py b/augur/api/routes/__init__.py index 03c2e2fa71..8176dad94b 100644 --- a/augur/api/routes/__init__.py +++ b/augur/api/routes/__init__.py @@ -4,10 +4,8 @@ from .batch import * from .collection_status import * from .config import * -from .contributor_reports import * from .manager import * from .nonstandard_metrics import * -from .pull_request_reports import * from .user import * from .dei import * from .util import * diff --git a/augur/api/routes/contributor_reports.py b/augur/api/routes/contributor_reports.py deleted file mode 100644 index 6c107ed603..0000000000 --- a/augur/api/routes/contributor_reports.py +++ /dev/null @@ -1,1284 +0,0 @@ -import psycopg2 -import psycopg2 -import sqlalchemy as salc -import numpy as np -import warnings -import datetime -import pandas as pd -from math import pi -from flask import request, send_file, Response, current_app - -# import visualization libraries -from bokeh.io import export_png -from bokeh.embed import json_item -from bokeh.plotting import figure -from bokeh.models import Label, LabelSet, ColumnDataSource, Legend -from bokeh.palettes import Colorblind -from bokeh.layouts import gridplot -from bokeh.transform import cumsum - -from augur.api.routes import AUGUR_API_VERSION -from ..server import app - -warnings.filterwarnings('ignore') - -def quarters(month, year): - if 1 <= month <= 3: - return '01' + '/' + year - elif 4 <= month <= 6: - return '04' + '/' + year - elif 5 <= month <= 9: - return '07' + '/' + year - elif 10 <= month <= 12: - return '10' + '/' + year - -def new_contributor_data_collection(repo_id, required_contributions): - - rank_list = [] - for num in range(1, required_contributions + 1): - rank_list.append(num) - rank_tuple = tuple(rank_list) - -##### - -## Commented out due to abuse. - -##### - - - # contributor_query = salc.sql.text(f""" - - # SELECT * FROM ( - # SELECT ID AS - # cntrb_id, - # A.created_at AS created_at, - # date_part('month', A.created_at::DATE) AS month, - # date_part('year', A.created_at::DATE) AS year, - # A.repo_id, - # repo_name, - # full_name, - # login, - # ACTION, - # rank() OVER ( - # PARTITION BY id - # ORDER BY A.created_at ASC - # ) - # FROM - # ( - # ( - # SELECT - # canonical_id AS ID, - # created_at AS created_at, - # repo_id, - # 'issue_opened' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.issues - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = issues.reporter_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # repo_id = {repo_id} - # AND pull_request IS NULL - # GROUP BY - # canonical_id, - # repo_id, - # issues.created_at, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # canonical_id AS ID, - # TO_TIMESTAMP( cmt_author_date, 'YYYY-MM-DD' ) AS created_at, - # repo_id, - # 'commit' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.commits - # LEFT OUTER JOIN augur_data.contributors ON cntrb_email = cmt_author_email - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # repo_id = {repo_id} - # GROUP BY - # repo_id, - # canonical_email, - # canonical_id, - # commits.cmt_author_date, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # message.cntrb_id AS ID, - # created_at AS created_at, - # commits.repo_id, - # 'commit_comment' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - - # FROM - # augur_data.commit_comment_ref, - # augur_data.commits, - # augur_data.message - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # commits.cmt_id = commit_comment_ref.cmt_id - # AND commits.repo_id = {repo_id} - # AND commit_comment_ref.msg_id = message.msg_id - - # GROUP BY - # ID, - # commits.repo_id, - # commit_comment_ref.created_at, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # issue_events.cntrb_id AS ID, - # issue_events.created_at AS created_at, - # issues.repo_id, - # 'issue_closed' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.issues, - # augur_data.issue_events - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = issue_events.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # issues.repo_id = {repo_id} - # AND issues.issue_id = issue_events.issue_id - # AND issues.pull_request IS NULL - # AND issue_events.cntrb_id IS NOT NULL - # AND ACTION = 'closed' - # GROUP BY - # issue_events.cntrb_id, - # issues.repo_id, - # issue_events.created_at, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # pr_augur_contributor_id AS ID, - # pr_created_at AS created_at, - # pull_requests.repo_id, - # 'open_pull_request' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.pull_requests - # LEFT OUTER JOIN augur_data.contributors ON pull_requests.pr_augur_contributor_id = contributors.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # pull_requests.repo_id = {repo_id} - # GROUP BY - # pull_requests.pr_augur_contributor_id, - # pull_requests.repo_id, - # pull_requests.pr_created_at, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # message.cntrb_id AS ID, - # msg_timestamp AS created_at, - # pull_requests.repo_id as repo_id, - # 'pull_request_comment' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # augur_data.pull_requests, - # augur_data.pull_request_message_ref, - # augur_data.message - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # pull_requests.repo_id = {repo_id} - # AND pull_request_message_ref.pull_request_id = pull_requests.pull_request_id - # AND pull_request_message_ref.msg_id = message.msg_id - # GROUP BY - # message.cntrb_id, - # pull_requests.repo_id, - # message.msg_timestamp, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) UNION ALL - # ( - # SELECT - # issues.reporter_id AS ID, - # msg_timestamp AS created_at, - # issues.repo_id as repo_id, - # 'issue_comment' AS ACTION, - # contributors.cntrb_full_name AS full_name, - # contributors.cntrb_login AS login - # FROM - # issues, - # issue_message_ref, - # message - # LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - # LEFT OUTER JOIN ( - # SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - # cntrb_canonical AS canonical_email, - # data_collection_date, - # cntrb_id AS canonical_id - # FROM augur_data.contributors - # WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - # ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - # WHERE - # issues.repo_id = {repo_id} - # AND issue_message_ref.msg_id = message.msg_id - # AND issues.issue_id = issue_message_ref.issue_id - # AND issues.pull_request_id = NULL - # GROUP BY - # issues.reporter_id, - # issues.repo_id, - # message.msg_timestamp, - # contributors.cntrb_full_name, - # contributors.cntrb_login - # ) - # ) A, - # repo - # WHERE - # ID IS NOT NULL - # AND A.repo_id = repo.repo_id - # GROUP BY - # A.ID, - # A.repo_id, - # A.ACTION, - # A.created_at, - # repo.repo_name, - # A.full_name, - # A.login - # ORDER BY - # cntrb_id - # ) b - # WHERE RANK IN {rank_tuple} - - # """) - # contributor_query2 = (""" - - # select count(*) from augur_data.repo; - # """) - - #with current_app.engine.connect() as conn: - # df = pd.read_sql(contributor_query2, conn) - - #df = df.loc[~df['full_name'].str.contains('bot', na=False)] - #df = df.loc[~df['login'].str.contains('bot', na=False)] - - #df = df.loc[~df['cntrb_id'].isin(df[df.duplicated(['cntrb_id', 'created_at', 'repo_id', 'rank'])]['cntrb_id'])] - - # add yearmonths to contributor - #df[['month', 'year']] = df[['month', 'year']].astype(int).astype(str) - #df['yearmonth'] = df['month'] + '/' + df['year'] - #df['yearmonth'] = pd.to_datetime(df['yearmonth']) - - # add column with every value being one, so when the contributor df is concatenated - # with the months df, the filler months won't be counted in the sums - #df['new_contributors'] = 1 - - # add quarters to contributor dataframe - #df['month'] = df['month'].astype(int) - #df['quarter'] = df.apply(lambda x: quarters(x['month'], x['year']), axis=1, result_type='reduce') - #df['quarter'] = pd.to_datetime(df['quarter']) - - df = [1] - return df - -def months_data_collection(start_date, end_date): - - # months_query makes a df of years and months, this is used to fill - # the months with no data in the visualizations - months_query = salc.sql.text(f""" - SELECT * - FROM - ( - SELECT - date_part( 'year', created_month :: DATE ) AS year, - date_part( 'month', created_month :: DATE ) AS MONTH - FROM - (SELECT * - FROM ( - SELECT created_month :: DATE - FROM generate_series (TIMESTAMP '{start_date}', TIMESTAMP '{end_date}', INTERVAL '1 month' ) created_month ) d ) x - ) y - """) - - with current_app.engine.connect() as conn: - months_df = pd.read_sql(months_query, conn) - - # add yearmonths to months_df - months_df[['year', 'month']] = months_df[['year', 'month']].astype(float).astype(int).astype(str) - months_df['yearmonth'] = months_df['month'] + '/' + months_df['year'] - months_df['yearmonth'] = pd.to_datetime(months_df['yearmonth']) - - # filter months_df with start_date and end_date, the contributor df is filtered in the visualizations - months_df = months_df.set_index(months_df['yearmonth']) - months_df = months_df.loc[start_date: end_date].reset_index(drop=True) - - # add quarters to months dataframe - months_df['month'] = months_df['month'].astype(int) - months_df['quarter'] = months_df.apply(lambda x: quarters(x['month'], x['year']), axis=1) - months_df['quarter'] = pd.to_datetime(months_df['quarter']) - - return months_df - -def get_repo_id_start_date_and_end_date(): - - now = datetime.datetime.now() - - repo_id = request.args.get('repo_id') - start_date = str(request.args.get('start_date', "{}-01-01".format(now.year - 1))) - end_date = str(request.args.get('end_date', "{}-{}-{}".format(now.year, now.month, now.day))) - - if repo_id: - - if start_date < end_date: - return int(repo_id), start_date, end_date, None - else: - - error = { - "message": "Invalid end_date. end_date is before the start_date", - "status_code": 400 - } - - return int(repo_id), None, None, error - - else: - error = { - "message": "repo_id not specified. Use this endpoint to get a list of available repos: http:///api/unstable/repos", - "status_code": 400 - } - return None, None, None, error - -def filter_out_repeats_without_required_contributions_in_required_time(repeat_list, repeats_df, required_time, - first_list): - - differences = [] - for i in range(0, len(repeat_list)): - time_difference = repeat_list[i] - first_list[i] - total = time_difference.days * 86400 + time_difference.seconds - differences.append(total) - repeats_df['differences'] = differences - - # remove contributions who made enough contributions, but not in a short enough time - repeats_df = repeats_df.loc[repeats_df['differences'] <= required_time * 86400] - - return repeats_df - -def compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, required_time, start_date): - - # create a copy of contributor dataframe - driver_df = input_df.copy() - - # remove first time contributors before begin date, along with their second contribution - mask = (driver_df['yearmonth'] < start_date) - driver_df = driver_df[~driver_df['cntrb_id'].isin(driver_df.loc[mask]['cntrb_id'])] - - # determine if contributor is a drive by by finding all the cntrb_id's that do not have a second contribution - repeats_df = driver_df.copy() - - repeats_df = repeats_df.loc[repeats_df['rank'].isin([1, required_contributions])] - - # removes all the contributors that only have a first contirbution - repeats_df = repeats_df[ - repeats_df['cntrb_id'].isin(repeats_df.loc[driver_df['rank'] == required_contributions]['cntrb_id'])] - - repeat_list = repeats_df.loc[driver_df['rank'] == required_contributions]['created_at'].tolist() - first_list = repeats_df.loc[driver_df['rank'] == 1]['created_at'].tolist() - - repeats_df = repeats_df.loc[driver_df['rank'] == 1] - repeats_df['type'] = 'repeat' - - repeats_df = filter_out_repeats_without_required_contributions_in_required_time( - repeat_list, repeats_df, required_time, first_list) - - repeats_df = repeats_df.loc[repeats_df['differences'] <= required_time * 86400] - - repeat_cntrb_ids = repeats_df['cntrb_id'].to_list() - - drive_by_df = driver_df.loc[~driver_df['cntrb_id'].isin(repeat_cntrb_ids)] - - drive_by_df = drive_by_df.loc[driver_df['rank'] == 1] - drive_by_df['type'] = 'drive_by' - - return drive_by_df, repeats_df - -def add_caption_to_visualizations(caption, required_contributions, required_time, plot_width): - - caption_plot = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - - caption_plot.add_layout(Label( - x=0, - y=160, - x_units='screen', - y_units='screen', - text='{}'.format(caption.format(required_contributions, required_time)), - text_font='times', - text_font_size='15pt', - render_mode='css' - )) - caption_plot.outline_line_color = None - - return caption_plot - -def format_new_cntrb_bar_charts(plot, rank, group_by_format_string): - - plot.xgrid.grid_line_color = None - plot.y_range.start = 0 - plot.axis.minor_tick_line_color = None - plot.outline_line_color = None - - plot.title.align = "center" - plot.title.text_font_size = "18px" - - plot.yaxis.axis_label = 'Second Time Contributors' if rank == 2 else 'New Contributors' - plot.xaxis.axis_label = group_by_format_string - - plot.xaxis.axis_label_text_font_size = "18px" - plot.yaxis.axis_label_text_font_size = "16px" - - plot.xaxis.major_label_text_font_size = "16px" - plot.xaxis.major_label_orientation = 45.0 - - plot.yaxis.major_label_text_font_size = "16px" - - return plot - -def add_charts_and_captions_to_correct_positions(chart_plot, caption_plot, rank, contributor_type, - row_1, row_2, row_3, row_4): - - if rank == 1 and (contributor_type == 'All' or contributor_type == 'repeat'): - row_1.append(chart_plot) - row_2.append(caption_plot) - elif rank == 2 or contributor_type == 'drive_by': - row_3.append(chart_plot) - row_4.append(caption_plot) - -def get_new_cntrb_bar_chart_query_params(): - - group_by = str(request.args.get('group_by', "quarter")) - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - return group_by, required_contributions, required_time - -def remove_rows_before_start_date(df, start_date): - - mask = (df['yearmonth'] < start_date) - result_df = df[~df['cntrb_id'].isin(df.loc[mask]['cntrb_id'])] - - return result_df - -def remove_rows_with_null_values(df, not_null_columns=[]): - """Remove null data from pandas df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that are searched for NULL values - type: list - default: [] (means all columns will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check all columns for NULL values - - Return Value - -- Modified Pandas Dataframe - """ - - if len(not_null_columns) == 0: - not_null_columns = df.columns.to_list() - - total_rows_removed = 0 - for col in not_null_columns: - rows_removed = len(df.loc[df[col].isnull() is True]) - - if rows_removed > 0: - print(f"{rows_removed} rows have been removed because of null values in column {col}") - total_rows_removed += rows_removed - - df = df.loc[df[col].isnull() is False] - - if total_rows_removed > 0: - print(f"\nTotal rows removed because of null data: {total_rows_removed}"); - else: - print("No null data found") - - return df - -def get_needed_columns(df, list_of_columns): - """Get only a specific list of columns from a Pandas Dataframe - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that will be kept in dataframe - type: list - - Return Value - -- Modified Pandas Dataframe - """ - return df[list_of_columns] - -def filter_data(df, needed_columns, not_null_columns=[]): - """Filters out the unneeded rows in the df, and removed NULL data from df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- needed_columns - description: the columns to keep in the dataframe - - -- not_null_columns - description: columns that will be searched for NULL data, - if NULL values are found those rows will be removed - default: [] (means all columns in needed_columns list will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check - all columns in needed_columns list for NULL values - Return Value - -- Modified Pandas Dataframe - """ - - if all(x in needed_columns for x in not_null_columns): - - df = get_needed_columns(df, needed_columns) - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - df = df.dropna(subset=not_null_columns)#remove_rows_with_null_values(df, not_null_columns) - - return df - else: - print("Developer error, not null columns should be a subset of needed columns") - return df - -@app.route('/{}/contributor_reports/new_contributors_bar/'.format(AUGUR_API_VERSION), methods=["GET"]) -def new_contributors_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by, required_contributions, required_time = get_new_cntrb_bar_chart_query_params() - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - # TODO remove full_name from data for all charts since it is not needed in vis generation - not_null_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - input_df = input_df.dropna(subset=not_null_columns)#remove_rows_with_null_values(input_df, not_null_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - contributor_types = ['All', 'repeat', 'drive_by'] - ranks = [1, 2] - - row_1, row_2, row_3, row_4 = [], [], [], [] - - all_df = remove_rows_before_start_date(input_df, start_date) - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - for rank in ranks: - for contributor_type in contributor_types: - - # do not display these visualizations since drive-by's do not have second contributions, and the - # second contribution of a repeat contributor is the same thing as the all the second time contributors - if (rank == 2 and contributor_type == 'drive_by') or (rank == 2 and contributor_type == 'repeat'): - continue - - if contributor_type == 'repeat': - driver_df = repeats_df - - caption = """This graph shows repeat contributors in the specified time period. Repeat contributors - are contributors who have made {} or more contributions in {} days and their first contribution is - in the specified time period. New contributors are individuals who make their first contribution - in the specified time period.""" - - elif contributor_type == 'drive_by': - - driver_df = drive_by_df - - caption = """This graph shows fly by contributors in the specified time period. Fly by contributors - are contributors who make less than the required {} contributions in {} days. New contributors are - individuals who make their first contribution in the specified time period. Of course, then, “All - fly-by’s are by definition first time contributors”. However, not all first time contributors are - fly-by’s.""" - - elif contributor_type == 'All': - - if rank == 1: - driver_df = all_df - # makes df with all first time contributors - driver_df = driver_df.loc[driver_df['rank'] == 1] - caption = """This graph shows all the first time contributors, whether they contribute once, or - contribute multiple times. New contributors are individuals who make their first contribution - in the specified time period.""" - - if rank == 2: - - driver_df = all_df - - # creates df with all second time contributors - driver_df = driver_df.loc[driver_df['rank'] == 2] - caption = """This graph shows the second contribution of all - first time contributors in the specified time period.""" - # y_axis_label = 'Second Time Contributors' - - # filter by end_date, this is not done with the begin date filtering because a repeat contributor - # will look like drive-by if the second contribution is removed by end_date filtering - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # adds all months to driver_df so the lists of dates will include all months and years - driver_df = pd.concat([driver_df, months_df]) - - data = pd.DataFrame() - if group_by == 'year': - - data['dates'] = driver_df[group_by].unique() - - # new contributor counts for y-axis - data['new_contributor_counts'] = driver_df.groupby([group_by]).sum().reset_index()[ - 'new_contributors'] - - # used to format x-axis and title - group_by_format_string = "Year" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, - # then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - - # new contributor counts for y-axis - data['new_contributor_counts'] = driver_df.groupby([date_column]).sum().reset_index()[ - 'new_contributors'] - - # if the data set is large enough it will dynamically assign the width, if the data set is - # too small it will by default set to 870 pixel so the title fits - if len(data['new_contributor_counts']) >= 15: - plot_width = 46 * len(data['new_contributor_counts']) - else: - plot_width = 870 - - # create a dict convert an integer number into a word - # used to turn the rank into a word, so it is nicely displayed in the title - numbers = ['Zero', 'First', 'Second'] - num_conversion_dict = {} - for i in range(1, len(numbers)): - num_conversion_dict[i] = numbers[i] - number = '{}'.format(num_conversion_dict[rank]) - - # define pot for bar chart - p = figure(x_range=data['dates'], plot_height=400, plot_width=plot_width, - title="{}: {} {} Time Contributors Per {}".format(repo_dict[repo_id], - contributor_type.capitalize(), number, - group_by_format_string), - y_range=(0, max(data['new_contributor_counts']) * 1.15), margin=(0, 0, 10, 0)) - - p.vbar(x=data['dates'], top=data['new_contributor_counts'], width=0.8) - - source = ColumnDataSource( - data=dict(dates=data['dates'], new_contributor_counts=data['new_contributor_counts'])) - - # add contributor_count labels to chart - p.add_layout(LabelSet(x='dates', y='new_contributor_counts', text='new_contributor_counts', y_offset=4, - text_font_size="13pt", text_color="black", - source=source, text_align='center')) - - plot = format_new_cntrb_bar_charts(p, rank, group_by_format_string) - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - add_charts_and_captions_to_correct_positions(plot, caption_plot, rank, contributor_type, row_1, - row_2, row_3, row_4) - - # puts plots together into a grid - grid = gridplot([row_1, row_2, row_3, row_4]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/new_contributors_stacked_bar/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def new_contributors_stacked_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by, required_contributions, required_time = get_new_cntrb_bar_chart_query_params() - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - contributor_types = ['All', 'repeat', 'drive_by'] - ranks = [1, 2] - - row_1, row_2, row_3, row_4 = [], [], [], [] - - all_df = remove_rows_before_start_date(input_df, start_date) - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - for rank in ranks: - for contributor_type in contributor_types: - # do not display these visualizations since drive-by's do not have second contributions, - # and the second contribution of a repeat contributor is the same thing as the all the - # second time contributors - if (rank == 2 and contributor_type == 'drive_by') or (rank == 2 and contributor_type == 'repeat'): - continue - - if contributor_type == 'repeat': - driver_df = repeats_df - - caption = """This graph shows repeat contributors in the specified time period. Repeat contributors - are contributors who have made {} or more contributions in {} days and their first contribution is - in the specified time period. New contributors are individuals who make their first contribution in - the specified time period.""" - - elif contributor_type == 'drive_by': - - driver_df = drive_by_df - - caption = """This graph shows fly by contributors in the specified time period. Fly by contributors - are contributors who make less than the required {} contributions in {} days. New contributors are - individuals who make their first contribution in the specified time period. Of course, then, “All - fly-by’s are by definition first time contributors”. However, not all first time contributors are - fly-by’s.""" - - elif contributor_type == 'All': - if rank == 1: - driver_df = all_df - - # makes df with all first time contributors - driver_df = driver_df.loc[driver_df['rank'] == 1] - - caption = """This graph shows all the first time contributors, whether they contribute once, or - contribute multiple times. New contributors are individuals who make their first contribution in - the specified time period.""" - - if rank == 2: - driver_df = all_df - - # creates df with all second time contributor - driver_df = driver_df.loc[driver_df['rank'] == 2] - caption = """This graph shows the second contribution of all first time - contributors in the specified time period.""" - # y_axis_label = 'Second Time Contributors' - - # filter by end_date, this is not done with the begin date filtering because a repeat contributor will - # look like drive-by if the second contribution is removed by end_date filtering - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # adds all months to driver_df so the lists of dates will include all months and years - driver_df = pd.concat([driver_df, months_df]) - - actions = ['open_pull_request', 'pull_request_comment', 'commit', 'issue_closed', 'issue_opened', - 'issue_comment'] - - data = pd.DataFrame() - if group_by == 'year': - - # x-axis dates - data['dates'] = driver_df[group_by].unique() - - for contribution_type in actions: - data[contribution_type] = \ - pd.concat([driver_df.loc[driver_df['action'] == contribution_type], months_df]).groupby( - group_by).sum().reset_index()['new_contributors'] - - # new contributor counts for all actions - data['new_contributor_counts'] = driver_df.groupby([group_by]).sum().reset_index()[ - 'new_contributors'] - - # used to format x-axis and graph title - group_by_format_string = "Year" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, - # then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - - # new_contributor counts for each type of action - for contribution_type in actions: - data[contribution_type] = \ - pd.concat([driver_df.loc[driver_df['action'] == contribution_type], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - - print(data.to_string()) - - # new contributor counts for all actions - data['new_contributor_counts'] = driver_df.groupby([date_column]).sum().reset_index()[ - 'new_contributors'] - - # if the data set is large enough it will dynamically assign the width, if the data set is too small it - # will by default set to 870 pixel so the title fits - if len(data['new_contributor_counts']) >= 15: - plot_width = 46 * len(data['new_contributor_counts']) + 200 - else: - plot_width = 870 - - # create list of values for data source dict - actions_df_references = [] - for action in actions: - actions_df_references.append(data[action]) - - # created dict with the actions as the keys, and the values as the values from the df - data_source = {actions[i]: actions_df_references[i] for i in range(len(actions))} - data_source.update({'dates': data['dates'], 'New Contributor Counts': data['new_contributor_counts']}) - - colors = Colorblind[len(actions)] - - source = ColumnDataSource(data=data_source) - - # create a dict convert an integer number into a word - # used to turn the rank into a word, so it is nicely displayed in the title - numbers = ['Zero', 'First', 'Second'] - num_conversion_dict = {} - for i in range(1, len(numbers)): - num_conversion_dict[i] = numbers[i] - number = '{}'.format(num_conversion_dict[rank]) - - # y_max = 20 - # creates plot to hold chart - p = figure(x_range=data['dates'], plot_height=400, plot_width=plot_width, - title='{}: {} {} Time Contributors Per {}'.format(repo_dict[repo_id], - contributor_type.capitalize(), number, - group_by_format_string), - toolbar_location=None, y_range=(0, max(data['new_contributor_counts']) * 1.15)) - # max(data['new_contributor_counts'])* 1.15), margin = (0, 0, 0, 0)) - - vbar = p.vbar_stack(actions, x='dates', width=0.8, color=colors, source=source) - - # add total count labels - p.add_layout(LabelSet(x='dates', y='New Contributor Counts', text='New Contributor Counts', y_offset=4, - text_font_size="14pt", - text_color="black", source=source, text_align='center')) - - # add legend - legend = Legend(items=[(date, [action]) for (date, action) in zip(actions, vbar)], location=(0, 120), - label_text_font_size="16px") - p.add_layout(legend, 'right') - - plot = format_new_cntrb_bar_charts(p, rank, group_by_format_string) - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - add_charts_and_captions_to_correct_positions(plot, caption_plot, rank, contributor_type, row_1, - row_2, row_3, row_4) - - # puts plots together into a grid - grid = gridplot([row_1, row_2, row_3, row_4]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/returning_contributors_pie_chart/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def returning_contributors_pie_chart(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - print(repeats_df.to_string()) - - driver_df = pd.concat([drive_by_df, repeats_df]) - - # filter df by end date - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # first and second time contributor counts - drive_by_contributors = driver_df.loc[driver_df['type'] == 'drive_by'].count()['new_contributors'] - repeat_contributors = driver_df.loc[driver_df['type'] == 'repeat'].count()['new_contributors'] - - # create a dict with the # of drive-by and repeat contributors - x = {'Drive_By': drive_by_contributors, - 'Repeat': repeat_contributors} - - # turn dict 'x' into a dataframe with columns 'contributor_type', and 'counts' - data = pd.Series(x).reset_index(name='counts').rename(columns={'index': 'contributor_type'}) - - data['angle'] = data['counts'] / data['counts'].sum() * 2 * pi - data['color'] = ('#0072B2', '#E69F00') - data['percentage'] = ((data['angle'] / (2 * pi)) * 100).round(2) - - # format title - title = "{}: Number of Returning " \ - "Contributors out of {} from {} to {}" \ - .format(repo_dict[repo_id], drive_by_contributors + repeat_contributors, start_date, end_date) - - title_text_font_size = 18 - - plot_width = 850 - - # sets plot_width to width of title if title is wider than 850 pixels - if len(title) * title_text_font_size / 2 > plot_width: - plot_width = int(len(title) * title_text_font_size / 2) - - # creates plot for chart - p = figure(plot_height=450, plot_width=plot_width, title=title, - toolbar_location=None, x_range=(-0.5, 1.3), tools='hover', tooltips="@contributor_type", - margin=(0, 0, 0, 0)) - - p.wedge(x=0.87, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), - end_angle=cumsum('angle'), line_color=None, fill_color='color', - legend_field='contributor_type', source=data) - - start_point = 0.88 - for i in range(0, len(data['percentage'])): - # percentages - p.add_layout(Label(x=-0.17, y=start_point + 0.13 * (len(data['percentage']) - 1 - i), - text='{}%'.format(data.iloc[i]['percentage']), - render_mode='css', text_font_size='15px', text_font_style='bold')) - - # contributors - p.add_layout(Label(x=0.12, y=start_point + 0.13 * (len(data['percentage']) - 1 - i), - text='{}'.format(data.iloc[i]['counts']), - render_mode='css', text_font_size='15px', text_font_style='bold')) - - # percentages header - p.add_layout( - Label(x=-0.22, y=start_point + 0.13 * (len(data['percentage'])), text='Percentages', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - # legend header - p.add_layout( - Label(x=-0.43, y=start_point + 0.13 * (len(data['percentage'])), text='Category', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - # contributors header - p.add_layout( - Label(x=0, y=start_point + 0.13 * (len(data['percentage'])), text='# Contributors', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - p.axis.axis_label = None - p.axis.visible = False - p.grid.grid_line_color = None - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.legend.location = "center_left" - p.legend.border_line_color = None - p.legend.label_text_font_style = 'bold' - p.legend.label_text_font_size = "15px" - - plot = p - - caption = """This pie chart shows the percentage of new contributors who were fly-by or repeat contributors. - Fly by contributors are contributors who make less than the required {0} contributions in {1} days. - New contributors are individuals who make their first contribution in the specified time period. - Repeat contributors are contributors who have made {0} or more contributions in {1} days and their - first contribution is in the specified time period.""" - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - # put graph and caption plot together into one grid - grid = gridplot([[plot], [caption_plot]]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/returning_contributors_stacked_bar/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def returning_contributors_stacked_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "quarter")) - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - driver_df = pd.concat([drive_by_df, repeats_df, months_df]) - - # filter by end_date - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # create df to hold data needed for chart - data = pd.DataFrame() - if group_by == 'year': - - # x-axis dates - data['dates'] = driver_df[group_by].unique() - - data['repeat_counts'] = \ - driver_df.loc[driver_df['type'] == 'repeat'].groupby(group_by).count().reset_index()['new_contributors'] - data['drive_by_counts'] = \ - driver_df.loc[driver_df['type'] == 'drive_by'].groupby(group_by).count().reset_index()[ - 'new_contributors'] - - # new contributor counts for all contributor counts - total_counts = [] - for i in range(0, len(data['drive_by_counts'])): - total_counts.append(data.iloc[i]['drive_by_counts'] + data.iloc[i]['repeat_counts']) - data['total_counts'] = total_counts - - # used to format x-axis and graph title - group_by_format_string = "Year" - - # font size of drive by and repeat labels - label_text_font_size = "14pt" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - data['drive_by_counts'] = pd.concat([driver_df.loc[driver_df['type'] == 'drive_by'], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - data['repeat_counts'] = pd.concat([driver_df.loc[driver_df['type'] == 'repeat'], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - - # new contributor counts for all contributor types - total_counts = [] - for i in range(0, len(data['drive_by_counts'])): - total_counts.append(data.iloc[i]['drive_by_counts'] + data.iloc[i]['repeat_counts']) - data['total_counts'] = total_counts - - # font size of drive by and repeat labels - label_text_font_size = "13pt" - - data_source = {'Dates': data['dates'], - 'Fly By': data['drive_by_counts'], - 'Repeat': data['repeat_counts'], - 'All': data['total_counts']} - - groups = ["Fly By", "Repeat"] - - colors = ['#56B4E9', '#E69F00'] - - source = ColumnDataSource(data=data_source) - - # format title - title_text_font_size = 18 - - # if the data set is large enough it will dynamically assign the width, if the data set - # is too small it will by default set to 780 pixel so the title fits - if len(data['total_counts']) >= 13: - plot_width = 46 * len(data['total_counts']) + 210 - else: - plot_width = 780 - - p = figure(x_range=data['dates'], plot_height=500, plot_width=plot_width, - title="{}: Fly By and Repeat Contributor Counts per {}".format(repo_dict[repo_id], - group_by_format_string), - toolbar_location=None, y_range=(0, max(total_counts) * 1.15), margin=(0, 0, 0, 0)) - - vbar = p.vbar_stack(groups, x='Dates', width=0.8, color=colors, source=source) - - # add total counts above bars - p.add_layout(LabelSet(x='Dates', y='All', text='All', y_offset=8, text_font_size="14pt", - text_color="black", source=source, text_align='center')) - - # add drive by count labels - p.add_layout(LabelSet(x='Dates', y='Fly By', text='Fly By', y_offset=-22, text_font_size=label_text_font_size, - text_color="black", source=source, text_align='center')) - - # add repeat count labels - p.add_layout(LabelSet(x='Dates', y='All', text='Repeat', y_offset=-22, text_font_size=label_text_font_size, - text_color="black", source=source, text_align='center')) - - # add legend - legend = Legend(items=[(date, [group]) for (date, group) in zip(groups, vbar)], location=(0, 200), - label_text_font_size="16px") - p.add_layout(legend, 'right') - - p.xgrid.grid_line_color = None - p.y_range.start = 0 - p.axis.minor_tick_line_color = None - p.outline_line_color = None - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.yaxis.axis_label = '# Contributors' - p.xaxis.axis_label = group_by_format_string - - p.xaxis.axis_label_text_font_size = "18px" - p.yaxis.axis_label_text_font_size = "16px" - - p.xaxis.major_label_text_font_size = "16px" - p.xaxis.major_label_orientation = 45.0 - - p.yaxis.major_label_text_font_size = "16px" - - p.legend.label_text_font_size = "20px" - - plot = p - - caption = """This graph shows the number of new contributors in the specified time period, and indicates how - many were fly-by and repeat contributors. Fly by contributors are contributors who make less than the required - {0} contributions in {1} days. New contributors are individuals who make their first contribution in the - specified time period. Repeat contributors are contributors who have made {0} or more contributions in {1} - days and their first contribution is in the specified time period.""" - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - # put graph and caption plot together into one grid - grid = gridplot([[plot], [caption_plot]]) - - filename = export_png(grid) - - return send_file(filename) diff --git a/augur/api/routes/dei.py b/augur/api/routes/dei.py index 621c89604d..646081ba2c 100644 --- a/augur/api/routes/dei.py +++ b/augur/api/routes/dei.py @@ -69,7 +69,7 @@ def dei_track_repo(application: ClientApplication): "repo_id": repo_id } - enabled_phase_names = get_enabled_phase_names_from_config() + enabled_phase_names = get_enabled_phase_names_from_config_session(session, logger) #Primary collection hook. primary_enabled_phases = [] diff --git a/augur/api/routes/pull_request_reports.py b/augur/api/routes/pull_request_reports.py deleted file mode 100644 index 13aea31e8d..0000000000 --- a/augur/api/routes/pull_request_reports.py +++ /dev/null @@ -1,1922 +0,0 @@ -# import psycopg2 -import pandas as pd -import sqlalchemy as salc -import numpy as np -import warnings -import datetime -import json -# from scipy import stats -from flask import request, send_file, Response, current_app -import math - -from bokeh.palettes import Colorblind, mpl, Category20 -from bokeh.layouts import gridplot, column -from bokeh.models.annotations import Title -from bokeh.io import export_png, show # get_screenshot_as_png -# from bokeh.io.export import get_screenshot_as_png -from bokeh.embed import json_item -from bokeh.models import ColumnDataSource, Legend, LabelSet, Range1d, Label, FactorRange, BasicTicker, ColorBar, \ - LinearColorMapper, PrintfTickFormatter -from bokeh.plotting import figure -from bokeh.models.glyphs import Rect -from bokeh.transform import dodge, factor_cmap, transform - -# from selenium.webdriver import Firefox, FirefoxOptions -# options = FirefoxOptions() -# options.headless = True -# webdriver = Firefox(options=options) -#export_png(item, path, webdriver=webdriver) - -warnings.filterwarnings('ignore') - -from augur.api.routes import AUGUR_API_VERSION -from ..server import app - -def pull_request_data_collection(repo_id, start_date, end_date): - - pr_query = salc.sql.text(f""" - SELECT - repo.repo_id AS repo_id, - pull_requests.pr_src_id AS pr_src_id, - repo.repo_name AS repo_name, - pr_src_author_association, - repo_groups.rg_name AS repo_group, - pull_requests.pr_src_state, - pull_requests.pr_merged_at, - pull_requests.pr_created_at AS pr_created_at, - pull_requests.pr_closed_at AS pr_closed_at, - date_part( 'year', pr_created_at :: DATE ) AS CREATED_YEAR, - date_part( 'month', pr_created_at :: DATE ) AS CREATED_MONTH, - date_part( 'year', pr_closed_at :: DATE ) AS CLOSED_YEAR, - date_part( 'month', pr_closed_at :: DATE ) AS CLOSED_MONTH, - pr_src_meta_label, - pr_head_or_base, - ( EXTRACT ( EPOCH FROM pull_requests.pr_closed_at ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_close, - ( EXTRACT ( EPOCH FROM pull_requests.pr_closed_at ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_close, - ( EXTRACT ( EPOCH FROM first_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_first_response, - ( EXTRACT ( EPOCH FROM first_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_first_response, - ( EXTRACT ( EPOCH FROM last_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_last_response, - ( EXTRACT ( EPOCH FROM last_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_last_response, - first_response_time, - last_response_time, - EXTRACT ( EPOCH FROM average_time_between_responses), - assigned_count, - review_requested_count, - labeled_count, - subscribed_count, - mentioned_count, - referenced_count, - closed_count, - head_ref_force_pushed_count, - merged_count::INT, - milestoned_count, - unlabeled_count, - head_ref_deleted_count, - comment_count, - COALESCE(lines_added, 0) as lines_added, - COALESCE(lines_removed, 0) as lines_removed, - commit_count, - COALESCE(file_count, 0) as file_count - FROM - repo, - repo_groups, - pull_requests LEFT OUTER JOIN ( - SELECT pull_requests.pull_request_id, - count(*) FILTER (WHERE action = 'assigned') AS assigned_count, - count(*) FILTER (WHERE action = 'review_requested') AS review_requested_count, - count(*) FILTER (WHERE action = 'labeled') AS labeled_count, - count(*) FILTER (WHERE action = 'unlabeled') AS unlabeled_count, - count(*) FILTER (WHERE action = 'subscribed') AS subscribed_count, - count(*) FILTER (WHERE action = 'mentioned') AS mentioned_count, - count(*) FILTER (WHERE action = 'referenced') AS referenced_count, - count(*) FILTER (WHERE action = 'closed') AS closed_count, - count(*) FILTER (WHERE action = 'head_ref_force_pushed') AS head_ref_force_pushed_count, - count(*) FILTER (WHERE action = 'head_ref_deleted') AS head_ref_deleted_count, - count(*) FILTER (WHERE action = 'milestoned') AS milestoned_count, - COALESCE(count(*) FILTER (WHERE action = 'merged'), 0) AS merged_count, - COALESCE(MIN(message.msg_timestamp), pull_requests.pr_merged_at, pull_requests.pr_closed_at) AS first_response_time, - COALESCE(COUNT(DISTINCT message.msg_timestamp), 0) AS comment_count, - COALESCE(MAX(message.msg_timestamp), pull_requests.pr_closed_at) AS last_response_time, - COALESCE((MAX(message.msg_timestamp) - MIN(message.msg_timestamp)) / COUNT(DISTINCT message.msg_timestamp), pull_requests.pr_created_at - pull_requests.pr_closed_at) AS average_time_between_responses - FROM pull_requests - LEFT OUTER JOIN pull_request_events on pull_requests.pull_request_id = pull_request_events.pull_request_id - JOIN repo on repo.repo_id = pull_requests.repo_id - LEFT OUTER JOIN pull_request_message_ref on pull_requests.pull_request_id = pull_request_message_ref.pull_request_id - LEFT OUTER JOIN message on pull_request_message_ref.msg_id = message.msg_id - WHERE repo.repo_id = {repo_id} - GROUP BY pull_requests.pull_request_id - ) response_times - ON pull_requests.pull_request_id = response_times.pull_request_id - LEFT JOIN ( - SELECT pull_request_commits.pull_request_id, count(DISTINCT pr_cmt_sha) AS commit_count - FROM pull_request_commits, pull_requests, pull_request_meta - WHERE pull_requests.pull_request_id = pull_request_commits.pull_request_id - AND pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND pr_cmt_sha <> pull_requests.pr_merge_commit_sha - AND pr_cmt_sha <> pull_request_meta.pr_sha - GROUP BY pull_request_commits.pull_request_id - ) all_commit_counts - ON pull_requests.pull_request_id = all_commit_counts.pull_request_id - LEFT JOIN ( - SELECT MAX(pr_repo_meta_id), pull_request_meta.pull_request_id, pr_head_or_base, pr_src_meta_label - FROM pull_requests, pull_request_meta - WHERE pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND pr_head_or_base = 'base' - GROUP BY pull_request_meta.pull_request_id, pr_head_or_base, pr_src_meta_label - ) base_labels - ON base_labels.pull_request_id = all_commit_counts.pull_request_id - LEFT JOIN ( - SELECT sum(cmt_added) AS lines_added, sum(cmt_removed) AS lines_removed, pull_request_commits.pull_request_id, count(DISTINCT cmt_filename) AS file_count - FROM pull_request_commits, commits, pull_requests, pull_request_meta - WHERE cmt_commit_hash = pr_cmt_sha - AND pull_requests.pull_request_id = pull_request_commits.pull_request_id - AND pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND commits.repo_id = pull_requests.repo_id - AND commits.cmt_commit_hash <> pull_requests.pr_merge_commit_sha - AND commits.cmt_commit_hash <> pull_request_meta.pr_sha - GROUP BY pull_request_commits.pull_request_id - ) master_merged_counts - ON base_labels.pull_request_id = master_merged_counts.pull_request_id - WHERE - repo.repo_group_id = repo_groups.repo_group_id - AND repo.repo_id = pull_requests.repo_id - AND repo.repo_id = {repo_id} - ORDER BY - merged_count DESC - """) - - with current_app.engine.connect() as conn: - pr_all = pd.read_sql(pr_query, conn) - - pr_all[['assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', - 'comment_count', - 'commit_count', - 'file_count', - 'lines_added', - 'lines_removed' - ]] = pr_all[['assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', - 'comment_count', - 'commit_count', - 'file_count', - 'lines_added', - 'lines_removed' - ]].astype(float) - # Change years to int so that doesn't display as 2019.0 for example - pr_all[['created_year', 'closed_year']] = pr_all[['created_year', 'closed_year']].fillna(-1).astype(int).astype( - str) - - start_date = pd.to_datetime(start_date) - # end_date = pd.to_datetime('2020-02-01 09:00:00') - end_date = pd.to_datetime(end_date) - pr_all = pr_all[(pr_all['pr_created_at'] > start_date) & (pr_all['pr_closed_at'] < end_date)] - - pr_all['created_year'] = pr_all['created_year'].map(int) - pr_all['created_month'] = pr_all['created_month'].map(int) - pr_all['created_month'] = pr_all['created_month'].map(lambda x: '{0:0>2}'.format(x)) - pr_all['created_yearmonth'] = pd.to_datetime( - pr_all['created_year'].map(str) + '-' + pr_all['created_month'].map(str) + '-01') - - # getting the number of days of (today - created at) for the PRs that are still open - # and putting this in the days_to_close column - - # get timedeltas of creation time to todays date/time - days_to_close_open_pr = datetime.datetime.now() - pr_all.loc[pr_all['pr_src_state'] == 'open']['pr_created_at'] - - # get num days from above timedelta - days_to_close_open_pr = days_to_close_open_pr.apply(lambda x: x.days).astype(int) - - # for only OPEN pr's, set the days_to_close column equal to above dataframe - pr_all.loc[pr_all['pr_src_state'] == 'open'] = pr_all.loc[pr_all['pr_src_state'] == 'open'].assign( - days_to_close=days_to_close_open_pr) - - pr_all.loc[pr_all['pr_src_state'] == 'open'].head() - - # initiate column by setting all null datetimes - pr_all['closed_yearmonth'] = pd.to_datetime(np.nan) - - # Fill column with prettified string of year/month closed that looks like: 2019-07-01 - pr_all.loc[pr_all['pr_src_state'] == 'closed'] = pr_all.loc[pr_all['pr_src_state'] == 'closed'].assign( - closed_yearmonth=pd.to_datetime(pr_all.loc[pr_all['pr_src_state'] == 'closed']['closed_year'].astype(int - ).map( - str) + '-' + pr_all.loc[pr_all['pr_src_state'] == 'closed']['closed_month'].astype(int).map( - str) + '-01')) - - """ Merged flag """ - if 'pr_merged_at' in pr_all.columns.values: - pr_all['pr_merged_at'] = pr_all['pr_merged_at'].fillna(0) - pr_all['merged_flag'] = 'Not Merged / Rejected' - pr_all['merged_flag'].loc[pr_all['pr_merged_at'] != 0] = 'Merged / Accepted' - pr_all['merged_flag'].loc[pr_all['pr_src_state'] == 'open'] = 'Still Open' - del pr_all['pr_merged_at'] - - # Isolate the different state PRs for now - pr_open = pr_all.loc[pr_all['pr_src_state'] == 'open'] - pr_closed = pr_all.loc[pr_all['pr_src_state'] == 'closed'] - pr_merged = pr_all.loc[pr_all['merged_flag'] == 'Merged / Accepted'] - pr_not_merged = pr_all.loc[pr_all['merged_flag'] == 'Not Merged / Rejected'] - - # Filtering the 80th percentile slowest PRs - def filter_20_per_slowest(input_df): - pr_slow20_filtered = pd.DataFrame() - pr_slow20_x = pd.DataFrame() - pr_slow20_filtered = input_df.copy() - pr_slow20_filtered['percentile_rank_local'] = pr_slow20_filtered.days_to_close.rank(pct=True) - pr_slow20_filtered = pr_slow20_filtered.query('percentile_rank_local >= .8', ) - - return pr_slow20_filtered - - pr_slow20_open = filter_20_per_slowest(pr_open) - pr_slow20_closed = filter_20_per_slowest(pr_closed) - pr_slow20_merged = filter_20_per_slowest(pr_merged) - pr_slow20_not_merged = filter_20_per_slowest(pr_not_merged) - pr_slow20_all = filter_20_per_slowest(pr_all) - - return pr_all, pr_open, pr_closed, pr_merged, pr_not_merged, pr_slow20_all, pr_slow20_open, pr_slow20_closed, pr_slow20_merged, pr_slow20_not_merged - -def remove_outliers(input_df, field, num_outliers_repo_map): - df_no_outliers = input_df.copy() - for repo_name, num_outliers in num_outliers_repo_map.items(): - indices_to_drop = input_df.loc[input_df['repo_name'] == repo_name].nlargest(num_outliers, field).index - df_no_outliers = df_no_outliers.drop(index=indices_to_drop) - return df_no_outliers - -def remove_outliers_by_standard_deviation(input_df, column): - '''Takes a dataframe and a numeric column name. - Then removes all rows thare are than 3 standard deviations from the mean. - Returns a df without outliers, the # of outliers removed, outlier cutoff value''' - - # finds rows that are more than 3 standard deviations from the mean - outlier_cutoff = input_df[column].mean() + (3 * input_df[column].std()) - outlier_mask = input_df[column] > outlier_cutoff - - # determine number of outliers - outliers_removed = len(input_df.loc[outlier_mask]) - - df_no_outliers = input_df.loc[~outlier_mask] - - return df_no_outliers, outliers_removed, outlier_cutoff - -def hex_to_RGB(hex): - ''' "#FFFFFF" -> [255,255,255] ''' - # Pass 16 to the integer function for change of base - return [int(hex[i:i + 2], 16) for i in range(1, 6, 2)] - -def color_dict(gradient): - ''' Takes in a list of RGB sub-lists and returns dictionary of - colors in RGB and hex form for use in a graphing function - defined later on ''' - return {"hex": [RGB_to_hex(RGB) for RGB in gradient], - "r": [RGB[0] for RGB in gradient], - "g": [RGB[1] for RGB in gradient], - "b": [RGB[2] for RGB in gradient]} - -def RGB_to_hex(RGB): - ''' [255,255,255] -> "#FFFFFF" ''' - # Components need to be integers for hex to make sense - RGB = [int(x) for x in RGB] - return "#" + "".join(["0{0:x}".format(v) if v < 16 else - "{0:x}".format(v) for v in RGB]) - -def linear_gradient(start_hex, finish_hex="#FFFFFF", n=10): - ''' returns a gradient list of (n) colors between - two hex colors. start_hex and finish_hex - should be the full six-digit color string, - inlcuding the number sign ("#FFFFFF") ''' - # Starting and ending colors in RGB form - s = hex_to_RGB(start_hex) - f = hex_to_RGB(finish_hex) - # Initilize a list of the output colors with the starting color - RGB_list = [s] - # Calcuate a color at each evenly spaced value of t from 1 to n - for t in range(1, n): - # Interpolate RGB vector for color at the current value of t - curr_vector = [ - int(s[j] + (float(t) / (n - 1)) * (f[j] - s[j])) - for j in range(3) - ] - # Add it to our list of output colors - RGB_list.append(curr_vector) - - return color_dict(RGB_list) - -# dict of df types, and their locaiton in the tuple that the function pull_request_data_collection returns -def get_df_tuple_locations(): - return {"pr_all": 0, "pr_open": 1, "pr_closed": 2, "pr_merged": 3, "pr_not_merged": 4, "pr_slow20_all": 5, - "pr_slow20_open": 6, "pr_slow20_closed": 7, "pr_slow20_merged": 8, "pr_slow20_not_merged": 9} - -def add_caption_to_plot(caption_plot, caption): - - caption_plot.add_layout(Label( - x=0, # Change to shift caption left or right - y=160, - x_units='screen', - y_units='screen', - text='{}'.format(caption), - text_font='times', # Use same font as paper - text_font_size='15pt', - render_mode='css' - )) - caption_plot.outline_line_color = None - - return caption_plot - -def remove_rows_with_null_values(df, not_null_columns=[]): - """Remove null data from pandas df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that are searched for NULL values - type: list - default: [] (means all columns will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check all columns for NULL values - - Return Value - -- Modified Pandas Dataframe - """ - - if len(not_null_columns) == 0: - not_null_columns = df.columns.to_list() - - total_rows_removed = 0 - for col in not_null_columns: - rows_removed = len(df.loc[df[col].isnull()]) - #rows_removed = len(df.loc[df[col].isnull() is True]) - - if rows_removed > 0: - print(f"{rows_removed} rows have been removed because of null values in column {col}") - total_rows_removed += rows_removed - - df = df.loc[df[col].isnull() is False] - - if total_rows_removed > 0: - print(f"\nTotal rows removed because of null data: {total_rows_removed}"); - else: - print("No null data found") - - return df - -def get_needed_columns(df, list_of_columns): - """Get only a specific list of columns from a Pandas Dataframe - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that will be kept in dataframe - type: list - - Return Value - -- Modified Pandas Dataframe - """ - return df[list_of_columns] - -def filter_data(df, needed_columns, not_null_columns=[]): - """Filters out the unneeded rows in the df, and removed NULL data from df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- needed_columns - description: the columns to keep in the dataframe - - -- not_null_columns - description: columns that will be searched for NULL data, - if NULL values are found those rows will be removed - default: [] (means all columns in needed_columns list will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check - all columns in needed_columns list for NULL values - Return Value - -- Modified Pandas Dataframe - """ - - if all(x in needed_columns for x in not_null_columns): - - df = get_needed_columns(df, needed_columns) - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - df = df.dropna(subset=not_null_columns)#remove_rows_with_null_values(df, not_null_columns) - - return df - else: - print("Developer error, not null columns should be a subset of needed columns") - return df - -def get_repo_id_start_date_and_end_date(): - - """ Gets the repo_id, start_date, and end_date from the GET requests array - - :return: repo_id - id of the repo data is being retrieved for - :return: start_date - earliest time on visualization. Defaults to the January 1st of last year - :return: end_date - latest time on visualization. Defaults to current date - """ - - now = datetime.datetime.now() - - repo_id = request.args.get('repo_id') - start_date = str(request.args.get('start_date', "{}-01-01".format(now.year - 1))) - end_date = str(request.args.get('end_date', "{}-{}-{}".format(now.year, now.month, now.day))) - - if repo_id: - - if start_date < end_date: - return int(repo_id), start_date, end_date, None - else: - - error = { - "message": "Invalid end_date. end_date is before the start_date", - "status_code": 400 - } - - return int(repo_id), None, None, error - - else: - error = { - "message": "repo_id not specified. Use this endpoint to get a list of available repos: http:///api/unstable/repos", - "status_code": 400 - } - return None, None, None, error - -@app.route('/{}/pull_request_reports/average_commits_per_PR/'.format(AUGUR_API_VERSION), methods=["GET"]) -def average_commits_per_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "month")) - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - y_axis = 'num_commits' - group_by_bars = 'merged_flag' - description = 'All' - - # gets pr_all data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_all"]] - needed_columns = ['repo_id', 'repo_name', 'closed_year', 'closed_yearmonth', group_by_bars, 'commit_count'] - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # print(input_df.to_string()) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() # deep copy input data so we do not change the external dataframe - - # Change closed year to int so that doesn't display as 2019.0 for example - driver_df['closed_year'] = driver_df['closed_year'].astype(int).astype(str) - - # defaults to year - x_axis = 'closed_year' - x_groups = sorted(list(driver_df[x_axis].unique())) - - if group_by == 'month': - x_axis = "closed_yearmonth" - x_groups = np.unique(np.datetime_as_string(input_df[x_axis], unit='M')) - - # inner groups on x_axis they are merged and not_merged - groups = list(driver_df[group_by_bars].unique()) - - # setup color pallete - try: - colors = mpl['Plasma'][len(groups)] - except: - colors = [mpl['Plasma'][3][0]] + [mpl['Plasma'][3][1]] - - merged_avg_values = list(driver_df.loc[driver_df[group_by_bars] == 'Merged / Accepted'].groupby([x_axis], - as_index=False).mean().round( - 1)['commit_count']) - not_merged_avg_values = list( - driver_df.loc[driver_df[group_by_bars] == 'Not Merged / Rejected'].groupby([x_axis], - as_index=False).mean().round(1)[ - 'commit_count']) - - # Setup data in format for grouped bar chart - data = { - 'years': x_groups, - 'Merged / Accepted': merged_avg_values, - 'Not Merged / Rejected': not_merged_avg_values, - } - - x = [(year, pr_state) for year in x_groups for pr_state in groups] - counts = sum(zip(data['Merged / Accepted'], data['Not Merged / Rejected']), ()) - - source = ColumnDataSource(data=dict(x=x, counts=counts)) - - title_beginning = '{}: '.format(repo_dict[repo_id]) - title = "{}Average Commit Counts Per Year for {} Pull Requests".format(title_beginning, description) - - plot_width = len(x_groups) * 300 - title_text_font_size = 16 - - if (len(title) * title_text_font_size / 2) > plot_width: - plot_width = int(len(title) * title_text_font_size / 2) + 40 - - p = figure(x_range=FactorRange(*x), plot_height=450, plot_width=plot_width, title=title, - y_range=(0, max(merged_avg_values + not_merged_avg_values) * 1.15), toolbar_location=None) - - # Vertical bar glyph - p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", - fill_color=factor_cmap('x', palette=colors, factors=groups, start=1, end=2)) - - # Data label - labels = LabelSet(x='x', y='counts', text='counts', # y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="black", - source=source, text_align='center') - p.add_layout(labels) - - p.y_range.start = 0 - p.x_range.range_padding = 0.1 - p.xaxis.major_label_orientation = 1 - p.xgrid.grid_line_color = None - - p.yaxis.axis_label = 'Average Commits / Pull Request' - p.xaxis.axis_label = 'Year Closed' - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "15px" - - p.yaxis.axis_label_text_font_size = "15px" - p.yaxis.major_label_text_font_size = "15px" - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average commits per pull requests over an entire year," \ - " for merged and not merged pull requests." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "average_commits_per_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - # filename = export_png(grid, timeout=180, webdriver=webdriver) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/average_comments_per_PR/'.format(AUGUR_API_VERSION), methods=["GET"]) -def average_comments_per_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - group_by = 'merged_flag' - x_axis = 'comment_count' - description = "All Closed" - y_axis = 'closed_year' - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', y_axis, group_by, x_axis] - not_null_columns = needed_columns - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() - - try: - y_groups = sorted(list(driver_df[y_axis].unique())) - except: - y_groups = [repo_id] - - groups = driver_df[group_by].unique() - try: - colors = mpl['Plasma'][len(groups)] - except: - colors = [mpl['Plasma'][3][0]] + [mpl['Plasma'][3][1]] - - len_not_merged = len(driver_df.loc[driver_df['merged_flag'] == 'Not Merged / Rejected']) - len_merged = len(driver_df.loc[driver_df['merged_flag'] == 'Merged / Accepted']) - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 650 - p = figure(y_range=y_groups, plot_height=450, plot_width=plot_width, - # y_range=y_groups,#(pr_all[y_axis].min(),pr_all[y_axis].max()) #y_axis_type="datetime", - title='{} {}'.format(title_beginning, "Mean Comments for {} Pull Requests".format(description)), - toolbar_location=None) - - possible_maximums = [] - for y_value in y_groups: - - y_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Merged / Accepted')] - y_not_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Not Merged / Rejected')] - - if len(y_merged_data) > 0: - y_merged_data_mean = y_merged_data[x_axis].mean() - - if (math.isnan(y_merged_data_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_merged_data[x_axis + '_mean'] = y_merged_data_mean.round(1) - - else: - y_merged_data[x_axis + '_mean'] = 0 - - if len(y_not_merged_data) > 0: - y_not_merged_data_mean = y_not_merged_data[x_axis].mean() - - if math.isnan(y_not_merged_data_mean): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_not_merged_data[x_axis + '_mean'] = y_not_merged_data_mean.round(1) - - else: - y_not_merged_data[x_axis + '_mean'] = 0 - - not_merged_source = ColumnDataSource(y_not_merged_data) - merged_source = ColumnDataSource(y_merged_data) - - possible_maximums.append(max(y_not_merged_data[x_axis + '_mean'])) - possible_maximums.append(max(y_merged_data[x_axis + '_mean'])) - - # mean comment count for merged - merged_comment_count_glyph = p.hbar(y=dodge(y_axis, -0.1, range=p.y_range), left=0, right=x_axis + '_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=merged_source, - fill_color="black") # ,legend_label="Mean Days to Close", - # Data label - labels = LabelSet(x=x_axis + '_mean', y=dodge(y_axis, -0.1, range=p.y_range), text=x_axis + '_mean', - y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="black", - source=merged_source, text_align='center') - p.add_layout(labels) - # mean comment count For nonmerged - not_merged_comment_count_glyph = p.hbar(y=dodge(y_axis, 0.1, range=p.y_range), left=0, - right=x_axis + '_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=not_merged_source, - fill_color="#e84d60") # legend_label="Mean Days to Close", - # Data label - labels = LabelSet(x=x_axis + '_mean', y=dodge(y_axis, 0.1, range=p.y_range), text=x_axis + '_mean', - y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="#e84d60", - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # p.y_range.range_padding = 0.1 - p.ygrid.grid_line_color = None - p.legend.location = "bottom_right" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - p.xaxis.axis_label = 'Average Comments / Pull Request' - p.yaxis.axis_label = 'Repository' if y_axis == 'repo_name' else 'Year Closed' if y_axis == 'closed_year' else '' - - legend = Legend( - items=[ - ("Merged Pull Request Mean Comment Count", [merged_comment_count_glyph]), - ("Rejected Pull Request Mean Comment Count", [not_merged_comment_count_glyph]) - ], - - location='center', - orientation='vertical', - border_line_color="black" - ) - p.add_layout(legend, "below") - - p.title.text_font_size = "16px" - p.title.align = "center" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - p.x_range = Range1d(0, max(possible_maximums) * 1.15) - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of comments per merged or not merged pull request." - - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "average_comments_per_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/PR_counts_by_merged_status/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def PR_counts_by_merged_status(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - x_axis = 'closed_year' - description = 'All Closed' - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - pr_closed = df_tuple[df_type["pr_closed"]] - pr_closed_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_closed = filter_data(pr_closed, pr_closed_needed_columns) - - # gets pr_slow20_not_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_not_merged = df_tuple[df_type["pr_slow20_not_merged"]] - pr_slow20_not_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_slow20_not_merged = filter_data(pr_slow20_not_merged, pr_slow20_not_merged_needed_columns,) - - # gets pr_slow20_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_merged = df_tuple[df_type["pr_slow20_merged"]] - pr_slow20_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_slow20_merged = filter_data(pr_slow20_merged, pr_slow20_merged_needed_columns) - - if len(pr_closed) == 0 or len(pr_slow20_not_merged) == 0 or len(pr_slow20_merged) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - data_dict = {'All': pr_closed, 'Slowest 20%': pr_slow20_not_merged.append(pr_slow20_merged, ignore_index=True)} - - colors = mpl['Plasma'][6] - - for data_desc, input_df in data_dict.items(): - x_groups = sorted(list(input_df[x_axis].astype(str).unique())) - break - - plot_width = 315 * len(x_groups) - - if plot_width < 900: - plot_width = 900 - title_beginning = repo_dict[repo_id] - p = figure(x_range=x_groups, plot_height=350, plot_width=plot_width, - title='{}: {}'.format(title_beginning, - "Count of {} Pull Requests by Merged Status".format(description)), - toolbar_location=None) - - dodge_amount = 0.12 - color_index = 0 - x_offset = 60 - - all_totals = [] - for data_desc, input_df in data_dict.items(): - driver_df = input_df.copy() - - driver_df[x_axis] = driver_df[x_axis].astype(str) - - groups = sorted(list(driver_df['merged_flag'].unique())) - - driver_df = driver_df.loc[driver_df['repo_id'] == repo_id] - - len_merged = [] - zeros = [] - len_not_merged = [] - totals = [] - - for x_group in x_groups: - len_merged_entry = len( - driver_df.loc[(driver_df['merged_flag'] == 'Merged / Accepted') & (driver_df[x_axis] == x_group)]) - totals += [len(driver_df.loc[(driver_df['merged_flag'] == 'Not Merged / Rejected') & ( - driver_df[x_axis] == x_group)]) + len_merged_entry] - len_not_merged += [len(driver_df.loc[(driver_df['merged_flag'] == 'Not Merged / Rejected') & ( - driver_df[x_axis] == x_group)])] - len_merged += [len_merged_entry] - zeros.append(0) - - data = {'X': x_groups} - for group in groups: - data[group] = [] - for x_group in x_groups: - data[group] += [ - len(driver_df.loc[(driver_df['merged_flag'] == group) & (driver_df[x_axis] == x_group)])] - - data['len_merged'] = len_merged - data['len_not_merged'] = len_not_merged - data['totals'] = totals - data['zeros'] = zeros - - if data_desc == "All": - all_totals = totals - - source = ColumnDataSource(data) - - stacked_bar = p.vbar_stack(groups, x=dodge('X', dodge_amount, range=p.x_range), width=0.2, source=source, - color=colors[1:3], legend_label=[f"{data_desc} " + "%s" % x for x in groups]) - # Data label for merged - - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='zeros', text='len_merged', y_offset=2, - x_offset=x_offset, - text_font_size="12pt", text_color=colors[1:3][0], - source=source, text_align='center') - ) - if min(data['totals']) < 400: - y_offset = 15 - else: - y_offset = 0 - # Data label for not merged - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='totals', text='len_not_merged', - y_offset=y_offset, x_offset=x_offset, - text_font_size="12pt", text_color=colors[1:3][1], - source=source, text_align='center') - ) - # Data label for total - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='totals', text='totals', y_offset=0, x_offset=0, - text_font_size="12pt", text_color='black', - source=source, text_align='center') - ) - dodge_amount *= -1 - colors = colors[::-1] - x_offset *= -1 - - p.y_range = Range1d(0, max(all_totals) * 1.4) - - p.xgrid.grid_line_color = None - p.legend.location = "top_center" - p.legend.orientation = "horizontal" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - p.yaxis.axis_label = 'Count of Pull Requests' - p.xaxis.axis_label = 'Repository' if x_axis == 'repo_name' else 'Year Closed' if x_axis == 'closed_year' else '' - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - p.outline_line_color = None - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the number of closed pull requests per year in " \ - "four different categories. These four categories are All Merged, All Not Merged," \ - " Slowest 20% Merged, and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "PR_counts_by_merged_status")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/mean_response_times_for_PR/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def mean_response_times_for_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - time_unit = 'days' - x_max = 95 - y_axis = 'closed_year' - description = "All Closed" - legend_position = (410, 10) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', y_axis, 'merged_flag', time_unit + '_to_first_response', - time_unit + '_to_last_response', time_unit + '_to_close'] - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() # deep copy input data so we do not alter the external dataframe - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 950 - p = figure(toolbar_location=None, y_range=sorted(driver_df[y_axis].unique()), plot_width=plot_width, - plot_height=450, # 75*len(driver_df[y_axis].unique()), - title="{}Mean Response Times for Pull Requests {}".format(title_beginning, description)) - - first_response_glyphs = [] - last_response_glyphs = [] - merged_days_to_close_glyphs = [] - not_merged_days_to_close_glyphs = [] - - possible_maximums = [] - - # FIXME repo_set is not defined - # setup color pallete - try: - colors = Colorblind[len(repo_set)] - except: - colors = Colorblind[3] - - y_merged_data_list = [] - y_not_merged_data_list = [] - - # calculate data frist time to obtain the maximum and make sure there is message data - for y_value in driver_df[y_axis].unique(): - - y_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Merged / Accepted')] - y_not_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Not Merged / Rejected')] - - if len(y_merged_data) > 0: - - y_merged_data_first_response_mean = y_merged_data[time_unit + '_to_first_response'].mean() - y_merged_data_last_response_mean = y_merged_data[time_unit + '_to_last_response'].mean() - y_merged_data_to_close_mean = y_merged_data[time_unit + '_to_close'].mean() - - if (math.isnan(y_merged_data_first_response_mean) or math.isnan( - y_merged_data_last_response_mean) or math.isnan(y_merged_data_to_close_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_merged_data[time_unit + '_to_first_response_mean'] = y_merged_data_first_response_mean.round(1) - y_merged_data[time_unit + '_to_last_response_mean'] = y_merged_data_last_response_mean.round(1) - y_merged_data[time_unit + '_to_close_mean'] = y_merged_data_to_close_mean.round(1) - else: - y_merged_data[time_unit + '_to_first_response_mean'] = 0.00 - y_merged_data[time_unit + '_to_last_response_mean'] = 0.00 - y_merged_data[time_unit + '_to_close_mean'] = 0.00 - - if len(y_not_merged_data) > 0: - - y_not_merged_data_first_response_mean = y_not_merged_data[time_unit + '_to_first_response'].mean() - y_not_merged_data_last_response_mean = y_not_merged_data[time_unit + '_to_last_response'].mean() - y_not_merged_data_to_close_mean = y_not_merged_data[time_unit + '_to_close'].mean() - - if (math.isnan(y_not_merged_data_first_response_mean) or math.isnan( - y_not_merged_data_last_response_mean) or math.isnan(y_not_merged_data_to_close_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_not_merged_data[ - time_unit + '_to_first_response_mean'] = y_not_merged_data_first_response_mean.round(1) - y_not_merged_data[ - time_unit + '_to_last_response_mean'] = y_not_merged_data_last_response_mean.round(1) - y_not_merged_data[time_unit + '_to_close_mean'] = y_not_merged_data_to_close_mean.round(1) - else: - y_not_merged_data[time_unit + '_to_first_response_mean'] = 0.00 - y_not_merged_data[time_unit + '_to_last_response_mean'] = 0.00 - y_not_merged_data[time_unit + '_to_close_mean'] = 0.00 - - possible_maximums.append(max(y_merged_data[time_unit + '_to_close_mean'])) - possible_maximums.append(max(y_not_merged_data[time_unit + '_to_close_mean'])) - - maximum = max(possible_maximums) * 1.15 - ideal_difference = maximum * 0.064 - - y_merged_data_list.append(y_merged_data) - y_not_merged_data_list.append(y_not_merged_data) - - # loop through data and add it to the plot - for index in range(0, len(y_merged_data_list)): - - y_merged_data = y_merged_data_list[index] - y_not_merged_data = y_not_merged_data_list[index] - - not_merged_source = ColumnDataSource(y_not_merged_data) - merged_source = ColumnDataSource(y_merged_data) - - # mean PR length for merged - merged_days_to_close_glyph = p.hbar(y=dodge(y_axis, -0.1, range=p.y_range), left=0, - right=time_unit + '_to_close_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=merged_source, - fill_color="black") # ,legend_label="Mean Days to Close", - merged_days_to_close_glyphs.append(merged_days_to_close_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_close_mean', y=dodge(y_axis, -0.1, range=p.y_range), - text=time_unit + '_to_close_mean', y_offset=-8, x_offset=34, # 34 - text_font_size="12pt", text_color="black", - source=merged_source, text_align='center') - p.add_layout(labels) - - # mean PR length For nonmerged - not_merged_days_to_close_glyph = p.hbar(y=dodge(y_axis, 0.1, range=p.y_range), left=0, - right=time_unit + '_to_close_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=not_merged_source, - fill_color="#e84d60") # legend_label="Mean Days to Close", - not_merged_days_to_close_glyphs.append(not_merged_days_to_close_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_close_mean', y=dodge(y_axis, 0.1, range=p.y_range), - text=time_unit + '_to_close_mean', y_offset=-8, x_offset=44, - text_font_size="12pt", text_color="#e84d60", - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # if the difference between two values is less than 6.4 percent move the second one to the right 30 pixels - if (max(y_merged_data[time_unit + '_to_last_response_mean']) - max( - y_merged_data[time_unit + '_to_first_response_mean'])) < ideal_difference: - merged_x_offset = 30 - else: - merged_x_offset = 0 - - # if the difference between two values is less than 6.4 percent move the second one to the right 30 pixels - if (max(y_not_merged_data[time_unit + '_to_last_response_mean']) - max( - y_not_merged_data[time_unit + '_to_first_response_mean'])) < ideal_difference: - not_merged_x_offset = 30 - else: - not_merged_x_offset = 0 - - # if there is only one bar set the y_offsets so the labels will not overlap the bars - if len(driver_df[y_axis].unique()) == 1: - merged_y_offset = -65 - not_merged_y_offset = 45 - else: - merged_y_offset = -45 - not_merged_y_offset = 25 - - # mean time to first response - glyph = Rect(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, -0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[0]) - first_response_glyph = p.add_glyph(merged_source, glyph) - first_response_glyphs.append(first_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_first_response_mean', x_offset=0, y_offset=merged_y_offset, # -60, - text_font_size="12pt", text_color=colors[0], - source=merged_source, text_align='center') - p.add_layout(labels) - - # for nonmerged - glyph = Rect(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[0]) - first_response_glyph = p.add_glyph(not_merged_source, glyph) - first_response_glyphs.append(first_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_first_response_mean', x_offset=0, y_offset=not_merged_y_offset, - # 40, - text_font_size="12pt", text_color=colors[0], - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # mean time to last response - glyph = Rect(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, -0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[1]) - last_response_glyph = p.add_glyph(merged_source, glyph) - last_response_glyphs.append(last_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_last_response_mean', x_offset=merged_x_offset, - y_offset=merged_y_offset, # -60, - text_font_size="12pt", text_color=colors[1], - source=merged_source, text_align='center') - p.add_layout(labels) - - # for nonmerged - glyph = Rect(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[1]) - last_response_glyph = p.add_glyph(not_merged_source, glyph) - last_response_glyphs.append(last_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_last_response_mean', x_offset=not_merged_x_offset, - y_offset=not_merged_y_offset, # 40, - text_font_size="12pt", text_color=colors[1], - source=not_merged_source, text_align='center') - p.add_layout(labels) - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label = "Days to Close" - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - # adjust the starting point and ending point based on the maximum of maximum of the graph - p.x_range = Range1d(maximum / 30 * -1, maximum * 1.15) - - p.yaxis.axis_label = "Repository" if y_axis == 'repo_name' else 'Year Closed' if y_axis == 'closed_year' else '' - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - p.ygrid.grid_line_color = None - p.y_range.range_padding = 0.15 - - p.outline_line_color = None - p.toolbar.logo = None - p.toolbar_location = None - - def add_legend(location, orientation, side): - legend = Legend( - items=[ - ("Mean Days to First Response", first_response_glyphs), - ("Mean Days to Last Response", last_response_glyphs), - ("Merged Mean Days to Close", merged_days_to_close_glyphs), - ("Not Merged Mean Days to Close", not_merged_days_to_close_glyphs) - ], - - location=location, - orientation=orientation, - border_line_color="black" - # title='Example Title' - ) - p.add_layout(legend, side) - - # add_legend((150, 50), "horizontal", "center") - add_legend((10, 135), "vertical", "right") - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of days between comments for all closed pull requests per month " \ - "in four categories. These four categories are All Merged, All Not Merged, Slowest 20% Merged, " \ - "and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "mean_response_times_for_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/mean_days_between_PR_comments/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def mean_days_between_PR_comments(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - time_unit = 'Days' - x_axis = 'closed_yearmonth' - y_axis = 'average_days_between_responses' - description = "All Closed" - line_group = 'merged_flag' - num_outliers_repo_map = {} - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - pr_closed = df_tuple[df_type["pr_closed"]] - pr_closed_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_closed = filter_data(pr_closed, pr_closed_needed_columns) - - # gets pr_slow20_not_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_not_merged = df_tuple[df_type["pr_slow20_not_merged"]] - pr_slow20_not_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_slow20_not_merged = filter_data(pr_slow20_not_merged, pr_slow20_not_merged_needed_columns) - - # gets pr_slow20_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_merged = df_tuple[df_type["pr_slow20_merged"]] - pr_slow20_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_slow20_merged = filter_data(pr_slow20_merged, pr_slow20_merged_needed_columns) - - if len(pr_closed) == 0 or len(pr_slow20_not_merged) == 0 or len(pr_slow20_merged) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - try: - pr_closed['average_days_between_responses'] = pr_closed['average_time_between_responses'].map( - lambda x: x.days).astype(float) - pr_slow20_not_merged['average_days_between_responses'] = pr_slow20_not_merged[ - 'average_time_between_responses'].map(lambda x: x.days).astype(float) - pr_slow20_merged['average_days_between_responses'] = pr_slow20_merged['average_time_between_responses'].map( - lambda x: x.days).astype(float) - except: - return Response(response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - data_dict = {'All': pr_closed, 'Slowest 20%': pr_slow20_not_merged.append(pr_slow20_merged, ignore_index=True)} - - plot_width = 950 - p1 = figure(x_axis_type="datetime", - title="{}: Mean {} Between Comments by Month Closed for {} Pull Requests".format(repo_dict[repo_id], time_unit, description), - plot_width=plot_width, x_range=(data_dict["All"][x_axis].min(), data_dict["All"][x_axis].max()), plot_height=500, - toolbar_location=None) - colors = Category20[10][6:] - color_index = 0 - - glyphs = [] - - possible_maximums = [] - for data_desc, input_df in data_dict.items(): - - driver_df = input_df.copy() - - driver_df = remove_outliers(driver_df, y_axis, num_outliers_repo_map) - - driver_df = driver_df.loc[driver_df['repo_id'] == repo_id] - index = 0 - - driver_df_mean = driver_df.groupby(['repo_id', line_group, x_axis], as_index=False).mean() - - title_ending = '' - if repo_id: - title_ending += ' for Repo: {}'.format(repo_id) - - for group_num, line_group_value in enumerate(driver_df[line_group].unique(), color_index): - glyphs.append(p1.line(driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][x_axis], - driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][y_axis], - color=colors[group_num], line_width=3)) - color_index += 1 - possible_maximums.append( - max(driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][y_axis].dropna())) - for repo, num_outliers in num_outliers_repo_map.items(): - p1.add_layout( - Title(text="** {} outliers for {} were removed".format(num_outliers, repo), align="center"), - "below") - - p1.grid.grid_line_alpha = 0.3 - p1.xaxis.axis_label = 'Month Closed' - p1.xaxis.ticker.desired_num_ticks = 15 - p1.yaxis.axis_label = 'Mean {} Between Responses'.format(time_unit) - p1.legend.location = "top_left" - - legend = Legend( - items=[ - ("All Not Merged / Rejected", [glyphs[0]]), - ("All Merged / Accepted", [glyphs[1]]), - ("Slowest 20% Not Merged / Rejected", [glyphs[2]]), - ("Slowest 20% Merged / Accepted", [glyphs[3]]) - ], - - location='center_right', - orientation='vertical', - border_line_color="black" - ) - - p1.add_layout(legend, 'right') - - p1.title.text_font_size = "16px" - - p1.xaxis.axis_label_text_font_size = "16px" - p1.xaxis.major_label_text_font_size = "16px" - - p1.yaxis.axis_label_text_font_size = "16px" - p1.yaxis.major_label_text_font_size = "16px" - p1.xaxis.major_label_orientation = 45.0 - - p1.y_range = Range1d(0, max(possible_maximums) * 1.15) - - plot = p1 - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of days between comments for all" \ - " closed pull requests per month in four categories. These four categories" \ - " are All Merged, All Not Merged, Slowest 20% Merged, and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "mean_days_between_PR_comments")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/PR_time_to_first_response/'.format(AUGUR_API_VERSION), methods=["GET"]) -def PR_time_to_first_response(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - remove_outliers = str(request.args.get('remove_outliers', "true")) - - x_axis = 'pr_closed_at' - y_axis = 'days_to_first_response' - description = 'All' - group_by = 'merged_flag' - legend_position = 'top_right' - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', x_axis, group_by, y_axis] - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = pr_closed.copy() - - outliers_removed = 0 - - if remove_outliers == "true": - driver_df, outliers_removed, outlier_cutoff = remove_outliers_by_standard_deviation(driver_df, 'days_to_first_response') - - group_by_groups = sorted(driver_df[group_by].unique()) - - # setup color pallete - try: - # FIXME repo_set is not defined - colors = Colorblind[len(repo_set)] - except: - colors = Colorblind[3] - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 180 * 5 - p = figure(x_range=( - driver_df[x_axis].min() - datetime.timedelta(days=30), driver_df[x_axis].max() + datetime.timedelta(days=25)), - # (driver_df[y_axis].min(), driver_df[y_axis].max()), - toolbar_location=None, - title='{}Days to First Response for {} Closed Pull Requests'.format(title_beginning, description), - plot_width=plot_width, - plot_height=400, x_axis_type='datetime') - - for index, group_by_group in enumerate(group_by_groups): - p.scatter(x_axis, y_axis, color=colors[index], marker="square", - source=driver_df.loc[driver_df[group_by] == group_by_group], legend_label=group_by_group) - - if group_by_group == "Merged / Accepted": - merged_values = driver_df.loc[driver_df[group_by] == group_by_group][y_axis].dropna().values.tolist() - else: - not_merged_values = driver_df.loc[driver_df[group_by] == group_by_group][ - y_axis].dropna().values.tolist() - - values = not_merged_values + merged_values - - if outliers_removed > 0: - if repo_id: - p.add_layout(Title( - text="** Outliers cut off at {} days: {} outlier(s) for {} were removed **".format(outlier_cutoff, - outliers_removed, - repo_dict[ - repo_id]), - align="center"), "below") - else: - p.add_layout(Title( - text="** Outliers cut off at {} days: {} outlier(s) were removed **".format(outlier_cutoff, - outliers_removed), - align="center"), "below") - - p.xaxis.axis_label = 'Date Closed' if x_axis == 'pr_closed_at' else 'Date Created' if x_axis == 'pr_created_at' else 'Date' - p.yaxis.axis_label = 'Days to First Response' - p.legend.location = legend_position - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - if len(values) == 0: - return Response(response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # determine y_max by finding the max of the values and scaling it up a small amoutn - y_max = max(values) * 1.015 - - p.y_range = Range1d(0, y_max) - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the days to first reponse for individual pull requests, either Merged or Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "PR_time_to_first_response")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/average_PR_events_for_closed_PRs/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def average_PR_events_for_closed_PRs(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - include_comments = str(request.args.get('include_comments', True)) - - x_axis = 'closed_year' - facet = 'merged_flag' - columns = 2 - x_max = 1100 - y_axis = 'repo_name' - description = 'All Closed' - optional_comments = ['comment_count'] if include_comments else [] - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', x_axis, 'assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', facet] + optional_comments - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - colors = linear_gradient('#f5f5dc', '#fff44f', 150)['hex'] - - driver_df = pr_closed.copy() - driver_df[x_axis] = driver_df[x_axis].astype(str) - - if facet == 'closed_year' or y_axis == 'closed_year': - driver_df['closed_year'] = driver_df['closed_year'].astype(int).astype(str) - - y_groups = [ - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'referenced_count', - 'closed_count', - # 'milestoned_count', - ] + optional_comments - - optional_group_comments = ['comment'] if include_comments else [] - # y_groups = ['subscribed', 'mentioned', 'labeled', 'review_requested', 'head_ref_force_pushed', - # 'referenced', 'closed', 'merged', 'unlabeled', 'head_ref_deleted', 'milestoned', 'assigned'] - # + optional_group_comments - - x_groups = sorted(list(driver_df[x_axis].unique())) - - grid_array = [] - grid_row = [] - - for index, facet_group in enumerate(sorted(driver_df[facet].unique())): - - facet_data = driver_df.loc[driver_df[facet] == facet_group] - # display(facet_data.sort_values('merged_count', ascending=False).head(50)) - driver_df_mean = facet_data.groupby(['repo_id', 'repo_name', x_axis], as_index=False).mean().round(1) - - # if a record is field in a record is Nan then it is not counted by count() so when it is not - # 2 meaning both rows have a value, there is not enough data - if (driver_df_mean['assigned_count'].count() != 2 or driver_df_mean[ - 'review_requested_count'].count() != 2 or driver_df_mean['labeled_count'].count() != 2 or - driver_df_mean['subscribed_count'].count() != 2 or driver_df_mean['mentioned_count'].count() != 2 or - driver_df_mean['referenced_count'].count() != 2 or - driver_df_mean['closed_count'].count() != 2 or driver_df_mean[ - 'head_ref_force_pushed_count'].count() != 2 or driver_df_mean['merged_count'].count() != 2 or - driver_df_mean['milestoned_count'].count() != 2 or driver_df_mean['unlabeled_count'].count() != 2 or - driver_df_mean['head_ref_deleted_count'].count() != 2 or - driver_df_mean['comment_count'].count() != 2): - return Response(response="There is not enough data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # print(driver_df_mean.to_string()) - # data = {'Y' : y_groups} - # for group in y_groups: - # data[group] = driver_df_mean[group].tolist() - plot_width = 700 - p = figure(y_range=y_groups, plot_height=500, plot_width=plot_width, x_range=x_groups, - title='{}'.format(format(facet_group))) - - for y_group in y_groups: - driver_df_mean['field'] = y_group - source = ColumnDataSource(driver_df_mean) - mapper = LinearColorMapper(palette=colors, low=driver_df_mean[y_group].min(), - high=driver_df_mean[y_group].max()) - - p.rect(y='field', x=x_axis, width=1, height=1, source=source, - line_color=None, fill_color=transform(y_group, mapper)) - # Data label - labels = LabelSet(x=x_axis, y='field', text=y_group, y_offset=-8, - text_font_size="12pt", text_color='black', - source=source, text_align='center') - p.add_layout(labels) - - color_bar = ColorBar(color_mapper=mapper, location=(0, 0), - ticker=BasicTicker(desired_num_ticks=9), - formatter=PrintfTickFormatter(format="%d")) - # p.add_layout(color_bar, 'right') - - p.y_range.range_padding = 0.1 - p.ygrid.grid_line_color = None - - p.legend.location = "bottom_right" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - - p.xaxis.axis_label = 'Year Closed' - p.yaxis.axis_label = 'Event Type' - - p.title.align = "center" - p.title.text_font_size = "15px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - grid_row.append(p) - if index % columns == columns - 1: - grid_array.append(grid_row) - grid_row = [] - grid = gridplot(grid_array) - - # create caption plot - caption_plot = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average count of several different event types for " \ - "closed pull requests per year. It spilits the pull requests into two categories, " \ - "Merged / Accepted, and Not Merged / Rejected, so the similarities and differences are clear." - - caption_plot.add_layout(Label(x=0, y=380, x_units='screen', y_units='screen', text='{}'.format(caption), - text_font='times', text_font_size='15pt', render_mode='css')) - - # caption_plot.outline_line_color = None - caption_plot.toolbar_location = None - - # create title plot - title_plot = figure(width=plot_width, height=50, margin=(0, 0, 0, 0)) - title = '{}: Average Pull Request Event Types for {} Pull Requests'.format(repo_dict[repo_id], description) - - title_plot.add_layout(Label(x=550, y=0, x_units='screen', y_units='screen', text='{}'.format(title), - text_font='times', text_font_size='17px', - text_font_style='bold', render_mode='css')) - - # title_plot.outline_line_color = None - title_plot.toolbar_location = None - - layout = column([title_plot, grid, caption_plot], sizing_mode='scale_width') - - if return_json == "true": - var = Response(response=json.dumps(json_item(layout, "average_PR_events_for_closed_PRs")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(layout, timeout=181) # , webdriver=selenium.webdriver.firefox.webdriver) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/Average_PR_duration/'.format(AUGUR_API_VERSION), methods=["GET"]) -def Average_PR_duration(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "month")) - return_json = request.args.get('return_json', "false") - remove_outliers = str(request.args.get('remove_outliers', "true")) - - x_axis = 'repo_name' - group_by = 'merged_flag' - y_axis = 'closed_yearmonth' - description = "All Closed" - heat_field = 'pr_duration_days' - columns = 2 - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', y_axis, group_by, x_axis, 'pr_closed_at', 'pr_created_at'] - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - pr_duration_frame = pr_closed.assign(pr_duration=(pr_closed['pr_closed_at'] - pr_closed['pr_created_at'])) - pr_duration_frame = pr_duration_frame.assign( - pr_duration_days=(pr_duration_frame['pr_duration'] / datetime.timedelta(minutes=1)) / 60 / 24) - - repo_dict = {repo_id: pr_duration_frame.loc[pr_duration_frame['repo_id'] == repo_id].iloc[0]['repo_name']} - - red_green_gradient = linear_gradient('#0080FF', '#DC143C', 150)['hex'] # 32CD32 - - driver_df = pr_duration_frame.copy() - - driver_df[y_axis] = driver_df[y_axis].astype(str) - - # add new group by + xaxis column - driver_df['grouped_x'] = driver_df[x_axis] + ' - ' + driver_df[group_by] - - driver_df_mean = driver_df.groupby(['grouped_x', y_axis], as_index=False).mean() - - colors = red_green_gradient - y_groups = driver_df_mean[y_axis].unique() - x_groups = sorted(driver_df[x_axis].unique()) - grouped_x_groups = sorted(driver_df_mean['grouped_x'].unique()) - - # defualt outliers removed to 0 - outliers_removed = 0 - - if remove_outliers == "true": - driver_df_mean, outliers_removed, outlier_cutoff = remove_outliers_by_standard_deviation(driver_df_mean, - heat_field) - - values = driver_df_mean[heat_field].values.tolist() - - heat_max = max(values) * 1.02 - - mapper = LinearColorMapper(palette=colors, low=driver_df_mean[heat_field].min(), - high=heat_max) # driver_df_mean[heat_field].max()) - - source = ColumnDataSource(driver_df_mean) - title_beginning = repo_dict[repo_id] + ':' - plot_width = 1100 - p = figure(plot_width=plot_width, plot_height=300, - title="{} Mean Duration (Days) {} Pull Requests".format(title_beginning, description), - y_range=grouped_x_groups[::-1], x_range=y_groups, - toolbar_location=None, tools="") # , x_axis_location="above") - - for x_group in x_groups: - outliers = driver_df_mean.loc[ - (driver_df_mean[heat_field] > heat_max) & (driver_df_mean['grouped_x'].str.contains(x_group))] - - if outliers_removed > 0: - p.add_layout(Title( - text="** Outliers capped at {} days: {} outlier(s) for {} were capped at {} **".format( - outlier_cutoff, outliers_removed, x_group, outlier_cutoff), align="center"), "below") - - p.rect(x=y_axis, y='grouped_x', width=1, height=1, source=source, - line_color=None, fill_color=transform(heat_field, mapper)) - - color_bar = ColorBar(color_mapper=mapper, location=(0, 0), - ticker=BasicTicker(desired_num_ticks=9), - formatter=PrintfTickFormatter(format="%d")) - - p.add_layout(color_bar, 'right') - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.axis.axis_line_color = None - p.axis.major_tick_line_color = None - p.axis.major_label_text_font_size = "11pt" - p.axis.major_label_standoff = 0 - p.xaxis.major_label_orientation = 1.0 - p.xaxis.axis_label = 'Month Closed' if y_axis[0:6] == 'closed' else 'Date Created' if y_axis[ - 0:7] == 'created' else 'Repository' if y_axis == 'repo_name' else '' - # p.yaxis.axis_label = 'Merged Status' - - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "14px" - - p.yaxis.major_label_text_font_size = "15px" - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average duration of all closed pull requests. " \ - "Red represents a slow response relative to the others, while blue a light blue " \ - "represents a fast response relative to the others. Blank cells represents months " \ - "without pull requests." - p = add_caption_to_plot(p, caption) - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "Average_PR_duration")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - # newt = get_screenshot_as_png(grid, timeout=180, webdriver=selenium.webdriver.firefox.webdriver) - # filename = export_png(grid, timeout=180, webdriver=selenium.webdriver.firefox.webdriver) - filename = export_png(grid, timeout=180) - - # return sendfile(newt) - return send_file(filename) diff --git a/augur/api/view/api.py b/augur/api/view/api.py index 21d182024f..eee99c93c4 100644 --- a/augur/api/view/api.py +++ b/augur/api/view/api.py @@ -1,11 +1,20 @@ -from flask import request, jsonify, redirect, url_for, flash, current_app +import logging import re + +from flask import flash, current_app, jsonify, redirect, request, url_for from flask_login import current_user, login_required + from augur.application.db.models import Repo, RepoGroup, UserGroup, UserRepo -from augur.tasks.frontend import add_github_orgs_and_repos, parse_org_and_repo_name, parse_org_name, add_gitlab_repos -from .utils import * -from ..server import app from augur.application.db.session import DatabaseSession +from augur.tasks.frontend import ( + add_github_orgs_and_repos, + add_gitlab_repos, + parse_org_and_repo_name, + parse_org_name +) + +from ..server import app +from .utils import * @app.route('/cache/file/') @app.route('/cache/file/') @@ -155,21 +164,37 @@ def user_remove_repo(): group = request.args.get("group_name") repo = request.args.get("repo_id") - if not repo: - flash("No repo id provided") - if not group: - flash("No group name provided") - - repo = int(repo) + + if not repo or not group: + if not repo: + flash("No repo id provided") + if not group: + flash("No group name provided") + # Staying on same page instead of redirecting to settings + return redirect(url_for("user_group_view", group=group)) + + try: + repo_id = int(repo) + except (TypeError, ValueError) as e: + flash("Invalid repo id provided") + + logging.error(f"Invalid repo id provided for repo '{repo}'. Error: {e}") + + + return redirect(url_for("user_group_view", group=group)) - result = current_user.remove_repo(group, repo)[0] + result = current_user.remove_repo(group, repo_id)[0] if result: flash(f"Successfully removed repo {repo} from group {group}") else: flash("An error occurred removing repo from group") - return redirect(url_for("user_group_view") + f"?group={group}") + + return redirect(url_for("user_group_view", group=group)) + + + @app.route('/account/application/deauthorize') @login_required diff --git a/augur/api/view/init.py b/augur/api/view/init.py index 869b383a62..1737131352 100644 --- a/augur/api/view/init.py +++ b/augur/api/view/init.py @@ -19,9 +19,6 @@ def init_settings(): settings["cache_expiry"] = 604800 settings["serving"] = "http://augur.chaoss.io/api/unstable" settings["pagination_offset"] = 25 - # Put reports.yml in the same directory as the config file - config_dir = configFile.parent - settings["reports"] = os.path.join(config_dir, "reports.yml") settings["session_key"] = secrets.token_hex() def write_settings(current_settings): @@ -33,63 +30,6 @@ def write_settings(current_settings): with open(configFile, 'w') as file: yaml.dump(current_settings, file) -# default reports definition -reports = { - "pull_request_reports":[ - { - "url":"average_commits_per_PR", - "description":"Average commits per pull request" - }, - { - "url":"average_comments_per_PR", - "description":"Average comments per pull request" - }, - { - "url":"PR_counts_by_merged_status", - "description":"Pull request counts by merged status" - }, - { - "url":"mean_response_times_for_PR", - "description":"Mean response times for pull requests" - }, - { - "url":"mean_days_between_PR_comments", - "description":"Mean days between pull request comments" - }, - { - "url":"PR_time_to_first_response", - "description":"Pull request time until first response" - }, - { - "url":"average_PR_events_for_closed_PRs", - "description":"Average pull request events for closed pull requests" - }, - { - "url":"Average_PR_duration", - "description":"Average pull request duration" - } - ], - "contributor_reports":[ - { - "url":"new_contributors_bar", - "description":"New contributors bar graph" - }, - { - "url":"returning_contributors_pie_chart", - "description":"Returning contributors pie chart" - } - ], - "contributor_reports_stacked":[ - { - "url":"new_contributors_stacked_bar", - "description":"New contributors stacked bar chart" - }, - { - "url":"returning_contributors_stacked_bar", - "description":"Returning contributors stacked bar chart" - } - ] -} # Initialize logging def init_logging(): diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index 00d456733f..91d23531b4 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -221,13 +221,9 @@ def user_settings(): """ @app.route('/repos/views/repo/') def repo_repo_view(id): - # For some reason, there is no reports definition (shouldn't be possible) - if reports is None: - return render_message("Report Definitions Missing", "You requested a report for a repo on this instance, but a definition for the report layout was not found.") - repo = Repo.get_by_id(db_session, id) - return render_module("repo-info", reports=reports.keys(), images=reports, title="Repo", repo=repo, repo_id=id) + return render_module("repo-info", title="Repo", repo=repo, repo_id=id) """ ---------------------------------------------------------------- default: diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index aae5140cd7..dbfdd1b121 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -70,34 +70,6 @@ def getSetting(key, section = "View"): #version_check(settings) -""" ---------------------------------------------------------------- -""" -def loadReports(): - global reports - try: - with open(getSetting("reports")) as file: - reports = yaml.load(file, Loader=yaml.FullLoader) - id = -1 - for report in reports: - for image in reports[report]: - image['id'] = id = id + 1 - return True - except Exception as err: - logger.error(f"An exception occurred reading reports endpoints from [{getSetting('reports')}]:") - logger.error(err) - try: - with open(getSetting("reports"), 'w') as file: - logger.info("Attempting to generate default reports.yml") - yaml.dump(reports, file) - logger.info("Default reports file successfully generated.") - except Exception as ioErr: - logger.error("Error creating default report configuration:") - logger.error(ioErr) - return False - -if not loadReports(): - loadReports() - cache_files_requested = [] """ ---------------------------------------------------------------- @@ -160,58 +132,6 @@ def download(url, cmanager, filename, image_cache, image_id, repo_id = None): logger.error("An exception occurred writing a cache file to disk") logger.error(err) -""" ---------------------------------------------------------------- -""" -def requestReports(repo_id): - # If this request has already been fulfilled, no need to process it again - if(repo_id in report_requests.keys()): - return - - # initialize a new request entry to hold the resulting data - report_requests[repo_id] = {} - report_requests[repo_id]['complete'] = False - - host = getSetting("host", "Server") - port = getSetting("port", "Server") - - """ ---------- - If the report definition could not be loaded, we cannot determine what - files to request from the backend to compose the report. Returning here - causes the completion status of the request to be False, which will - display an error message when sent to the frontend. - """ - if reports is None: - return - - threadPools = [] - reportImages = {} - for report in reports: - # Reports is a dictionary of lists, so we get the size of each list - size = len(reports[report]) - - # Set up various threading components to manage image downloading - connection_mgr = urllib3.PoolManager(maxsize=size) - thread_pool = ThreadPoolExecutor(size) - threadPools.append(thread_pool) - - for image in reports[report]: - # Where should the downloaded image be stored (in cache) - filename = toCacheFilename(f"{image['url']}?repo_id={repo_id}") - # Where are we downloading the image from - image_url = f"{host}:{port}" + url_for(image['url'], repo_id = repo_id) - # f"{getSetting('serving')}/{image['url']}?repo_id={repo_id}" - - # Add a request for this image to the thread pool using the download function - thread_pool.submit(download, image_url, connection_mgr, filename, reportImages, image['id'], repo_id) - - # Wait for all connections to resolve, then clean up - for thread_pool in threadPools: - thread_pool.shutdown() - - report_requests[repo_id]['images'] = reportImages - - # Remove the request from the queue when completed - report_requests[repo_id]['complete'] = True """ ---------------------------------------------------------------- renderRepos: diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 28ed289033..341df88862 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -87,7 +87,10 @@ def start(ctx, disable_collection, development, pidfile, port): if disable_collection: os.environ["AUGUR_DISABLE_COLLECTION"] = "1" - worker_vmem_cap = get_value("Celery", 'worker_process_vmem_cap') + core_worker_count = get_value("Celery", 'core_worker_count') + secondary_worker_count = get_value("Celery", 'secondary_worker_count') + facade_worker_count = get_value("Celery", 'facade_worker_count') + # create rabbit messages so if it failed on shutdown the queues are clean cleanup_collection_status_and_rabbit(logger, ctx.obj.engine) @@ -119,7 +122,7 @@ def start(ctx, disable_collection, development, pidfile, port): logger.info(f'Augur is running at: {"http" if development else "https"}://{host}:{port}') logger.info(f"The API is available at '{api_response.json()['route']}'") - processes = start_celery_worker_processes(float(worker_vmem_cap), disable_collection) + processes = start_celery_worker_processes((core_worker_count, secondary_worker_count, facade_worker_count), disable_collection) celery_beat_schedule_db = os.getenv("CELERYBEAT_SCHEDULE_DB", "celerybeat-schedule.db") if os.path.exists(celery_beat_schedule_db): @@ -201,25 +204,26 @@ def start(ctx, disable_collection, development, pidfile, port): os.unlink(pidfile) -def start_celery_worker_processes(vmem_cap_ratio, disable_collection=False): +def start_celery_worker_processes(worker_counts: tuple[int, int, int], disable_collection=False): + """ + Args: + worker_counts (tuple): a tuple of three integers describing how many workers to use for core, secondary, and facade tasks + disable_collection (bool, optional): whether to disable collection entirely and not schedule any actual task workers. Defaults to False. + + Returns: + list: a list of the worker processes as executed by subprocess.Popen + """ #Calculate process scaling based on how much memory is available on the system in bytes. #Each celery process takes ~500MB or 500 * 1024^2 bytes process_list = [] - #Cap memory usage to 30% of total virtual memory - available_memory_in_bytes = psutil.virtual_memory().total * vmem_cap_ratio - available_memory_in_megabytes = available_memory_in_bytes / (1024 ** 2) - max_process_estimate = available_memory_in_megabytes // 500 - sleep_time = 0 + core_worker_count, secondary_worker_count, facade_worker_count = worker_counts - #Get a subset of the maximum procesess available using a ratio, not exceeding a maximum value - def determine_worker_processes(ratio,maximum): - return max(min(round(max_process_estimate * ratio),maximum),1) + sleep_time = 0 frontend_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=1 -n frontend:{uuid.uuid4().hex}@%h -Q frontend" - max_process_estimate -= 1 process_list.append(subprocess.Popen(frontend_worker.split(" "))) sleep_time += 6 @@ -227,28 +231,20 @@ def determine_worker_processes(ratio,maximum): #2 processes are always reserved as a baseline. scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=2 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling" - max_process_estimate -= 2 process_list.append(subprocess.Popen(scheduling_worker.split(" "))) sleep_time += 6 - - #60% of estimate, Maximum value of 45 : Reduced because it can be lower - core_num_processes = determine_worker_processes(.40, 90) - logger.info(f"Starting core worker processes with concurrency={core_num_processes}") - core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" + logger.info(f"Starting core worker processes with concurrency={core_worker_count}") + core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_worker_count} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 - #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.39, 50) - logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") - secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" + logger.info(f"Starting secondary worker processes with concurrency={secondary_worker_count}") + secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_worker_count} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 - #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.17, 20) - logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") - facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" + logger.info(f"Starting facade worker processes with concurrency={facade_worker_count}") + facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_worker_count} -n facade:{uuid.uuid4().hex}@%h -Q facade" process_list.append(subprocess.Popen(facade_worker.split(" "))) sleep_time += 6 diff --git a/augur/application/cli/collection.py b/augur/application/cli/collection.py index f18ff03e29..810fecf74a 100644 --- a/augur/application/cli/collection.py +++ b/augur/application/cli/collection.py @@ -78,9 +78,11 @@ def start(ctx, development): os.environ["AUGUR_DEV"] = "1" logger.info("Starting in development mode") - worker_vmem_cap = get_value("Celery", 'worker_process_vmem_cap') + core_worker_count = get_value("Celery", 'core_worker_count') + secondary_worker_count = get_value("Celery", 'secondary_worker_count') + facade_worker_count = get_value("Celery", 'facade_worker_count') - process_list = start_celery_collection_processes(float(worker_vmem_cap)) + process_list = start_celery_collection_processes((core_worker_count, secondary_worker_count, facade_worker_count)) if os.path.exists("celerybeat-schedule.db"): logger.info("Deleting old task schedule") @@ -127,47 +129,38 @@ def start(ctx, development): except RedisConnectionError: pass -def start_celery_collection_processes(vmem_cap_ratio): +def start_celery_collection_processes(worker_counts: tuple[int, int, int]): + """ + Args: + worker_counts (tuple): a tuple of three integers describing how many workers to use for core, secondary, and facade tasks - #Calculate process scaling based on how much memory is available on the system in bytes. - #Each celery process takes ~500MB or 500 * 1024^2 bytes + Returns: + list: a list of the collection processes as executed by subprocess.Popen + """ process_list = [] - #Cap memory usage to 30% of total virtual memory - available_memory_in_bytes = psutil.virtual_memory().total * vmem_cap_ratio - available_memory_in_megabytes = available_memory_in_bytes / (1024 ** 2) - max_process_estimate = available_memory_in_megabytes // 500 sleep_time = 0 - #Get a subset of the maximum processes available using a ratio, not exceeding a maximum value - def determine_worker_processes(ratio,maximum): - return max(min(round(max_process_estimate * ratio),maximum),1) + core_worker_count, secondary_worker_count, facade_worker_count = worker_counts #2 processes are always reserved as a baseline. scheduling_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency=2 -n scheduling:{uuid.uuid4().hex}@%h -Q scheduling" - max_process_estimate -= 2 process_list.append(subprocess.Popen(scheduling_worker.split(" "))) sleep_time += 6 - #60% of estimate, Maximum value of 45: Reduced because not needed - core_num_processes = determine_worker_processes(.40, 90) - logger.info(f"Starting core worker processes with concurrency={core_num_processes}") - core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h" + logger.info(f"Starting core collection processes with concurrency={core_worker_count}") + core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_worker_count} -n core:{uuid.uuid4().hex}@%h" process_list.append(subprocess.Popen(core_worker.split(" "))) sleep_time += 6 - #20% of estimate, Maximum value of 25 - secondary_num_processes = determine_worker_processes(.39, 50) - logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}") - secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" + logger.info(f"Starting secondary collection processes with concurrency={secondary_worker_count}") + secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_worker_count} -n secondary:{uuid.uuid4().hex}@%h -Q secondary" process_list.append(subprocess.Popen(secondary_worker.split(" "))) sleep_time += 6 - #15% of estimate, Maximum value of 20 - facade_num_processes = determine_worker_processes(.17, 20) - logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}") - facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade" + logger.info(f"Starting facade collection processes with concurrency={facade_worker_count}") + facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_worker_count} -n facade:{uuid.uuid4().hex}@%h -Q facade" process_list.append(subprocess.Popen(facade_worker.split(" "))) sleep_time += 6 diff --git a/augur/application/cli/config.py b/augur/application/cli/config.py index 7156c0561b..6f22ea6c83 100644 --- a/augur/application/cli/config.py +++ b/augur/application/cli/config.py @@ -9,7 +9,7 @@ from augur.application.db.models import Config from augur.application.db.session import DatabaseSession -from augur.application.config import AugurConfig +from augur.application.config import AugurConfig, redact_setting_value from augur.application.cli import DatabaseContext, test_connection, test_db_connection, with_database from augur.util.inspect_without_import import get_phase_names_without_import ROOT_AUGUR_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) @@ -68,15 +68,15 @@ def init_config(ctx, github_api_key, facade_repo_directory, gitlab_api_key, redi config = AugurConfig(logger, session) - default_config = config.default_config + augmented_config = config.base_config phase_names = get_phase_names_without_import() #Add all phases as enabled by default for name in phase_names: - if name not in default_config['Task_Routine']: - default_config['Task_Routine'].update({name : 1}) + if name not in augmented_config['Task_Routine']: + augmented_config['Task_Routine'].update({name : 1}) #print(default_config) if redis_conn_string: @@ -91,18 +91,18 @@ def init_config(ctx, github_api_key, facade_repo_directory, gitlab_api_key, redi except ValueError: pass - default_config["Redis"]["connection_string"] = redis_conn_string + augmented_config["Redis"]["connection_string"] = redis_conn_string if rabbitmq_conn_string: - default_config["RabbitMQ"]["connection_string"] = rabbitmq_conn_string + augmented_config["RabbitMQ"]["connection_string"] = rabbitmq_conn_string - default_config["Keys"] = keys + augmented_config["Keys"] = keys - default_config["Facade"]["repo_directory"] = facade_repo_directory + augmented_config["Facade"]["repo_directory"] = facade_repo_directory - default_config["Logging"]["logs_directory"] = logs_directory or (ROOT_AUGUR_DIRECTORY + "/logs/") + augmented_config["Logging"]["logs_directory"] = logs_directory or (ROOT_AUGUR_DIRECTORY + "/logs/") - config.load_config_from_dict(default_config) + config.load_config_from_dict(augmented_config) @cli.command('load') @@ -162,35 +162,17 @@ def add_section(ctx, section_name, file): @click.option('--section', required=True) @click.option('--setting', required=True) @click.option('--value', required=True) -@click.option('--data-type') @test_connection @test_db_connection @with_database @click.pass_context -def config_set(ctx, section, setting, value, data_type): +def config_set(ctx, section, setting, value): with DatabaseSession(logger, engine=ctx.obj.engine) as session: config = AugurConfig(logger, session) - - if not data_type: - result = session.query(Config).filter(Config.section_name == section, Config.setting_name == setting).all() - if not result: - return click.echo("You must specify a data-type if the setting does not already exist") - data_type = result[0].type - - if data_type not in config.accepted_types: - print(f"Error invalid type for config. Please use one of these types: {config.accepted_types}") - return - - setting_dict = { - "section_name": section, - "setting_name": setting, - "value": value, - "type": data_type - } - config.add_or_update_settings([setting_dict]) - print(f"{setting} in {section} section set to {value}") + config.add_value(section, setting, value) + print(f"{setting} in {section} section set to {redact_setting_value(section, setting, value)}") @cli.command('get') @click.option('--section', required=True) diff --git a/augur/application/config.py b/augur/application/config.py index 7ace2befd3..d35e32717b 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -5,6 +5,8 @@ import os from augur.application.db.models import Config from augur.application.db.util import execute_session_query, convert_type_of_value +from pathlib import Path +import logging def get_development_flag_from_config(): @@ -26,7 +28,9 @@ def get_development_flag_from_config(): def get_development_flag(): return os.getenv("AUGUR_DEV") or get_development_flag_from_config() or False - +def redact_setting_value(section_name, setting_name, value): + value_redacted = value if section_name != "Keys" else "REDACTED" + return value_redacted default_config = { "Augur": { @@ -37,7 +41,6 @@ def get_development_flag(): "github": "", "gitlab": "" }, - #TODO: a lot of these are deprecated. "Facade": { "check_updates": 1, "create_xlsx_summary_files": 1, @@ -50,7 +53,8 @@ def get_development_flag(): "pull_repos": 1, "rebuild_caches": 1, "run_analysis": 1, - "run_facade_contributors": 1 + "run_facade_contributors": 1, + "facade_contributor_full_recollect": 0 }, "Server": { "cache_expire": "3600", @@ -67,7 +71,9 @@ def get_development_flag(): "log_level": "INFO", }, "Celery": { - "worker_process_vmem_cap": 0.25, + "core_worker_count": 5, + "secondary_worker_count": 5, + "facade_worker_count": 5, "refresh_materialized_views_interval_in_days": 1 }, "Redis": { @@ -78,7 +84,11 @@ def get_development_flag(): "connection_string": "amqp://augur:password123@localhost:5672/augur_vhost" }, "Tasks": { - "collection_interval": 30 + "collection_interval": 30, + "core_collection_interval_days": 15, + "secondary_collection_interval_days": 10, + "facade_collection_interval_days": 10, + "ml_collection_interval_days": 40 }, "Message_Insights": { "insight_days": 30, @@ -105,6 +115,9 @@ def get_development_flag(): "secondary_repo_collect_phase": 1, "facade_phase": 1, "machine_learning_phase": 0 + }, + "Frontend": { + "pagination_offset": 25 } } @@ -115,14 +128,65 @@ class AugurConfig(): session: DatabaseSession + @property + def base_config(self): + """Return the "base" config - either the default config or a default config with user modifications on top + This is used as a base upon which the Augur CLI injects values, such as API keys, connection strings, + and other values passed in via environment variables. + This config is then modified and passed into `load_config_from_dict`. + """ + read_only_sources = self._fetch_config_stores(lambda source: not source.writable) + config = {} + for config_source in read_only_sources: + config.update(config_source.retrieve_dict()) + + return config + def __init__(self, logger, session: DatabaseSession): self.session = session self.logger = logger self.accepted_types = ["str", "bool", "int", "float", "NoneType"] - self.default_config = default_config + # list items in order of precedence. lowest precedence (i.e. fallback) values first + self.config_sources = [ + JsonConfig(default_config, logger) + ] + + config_dir = Path(os.getenv("CONFIG_DATADIR", "./")) + config_path = config_dir.joinpath("augur.json") + if config_path.exists(): + self.config_sources.append(JsonConfig(json.loads(config_path.read_text(encoding="UTF-8")), logger)) + + self.config_sources.append( DatabaseConfig(session, logger) ) + + def _get_writable_source(self) -> 'ConfigStore': + """Returns the highest precedence source that can be written to. + Intended to be used for operations that require changing the config updates. + + Raises: + NotWriteableException: If no sources are available for writing, this exception is raised to tell the caller they must proceed in a read only manner + + Returns: + ConfigStore: An instance of ConfigStore representing the config storage location that can be written to. + """ + writeable_sources = self._fetch_config_stores(lambda source: source.writable) + if len(writeable_sources) < 1: + raise NotWriteableException + + return writeable_sources[-1] + + def _fetch_config_stores(self, filter_func: None): + """Fetch the stack of config stores filtered by the provided function + + Args: + filter_func (func): a function or lambda accepting a ConfigSource as its only argument and returning a boolean indicating if it should be kept in or left out by the filter + """ + if filter_func is None: + return self.config_sources + return list(filter(filter_func, self.config_sources)) + def get_section(self, section_name) -> dict: """Get a section of data from the config. @@ -132,22 +196,11 @@ def get_section(self, section_name) -> dict: Returns: The section data as a dict """ - query = self.session.query(Config).filter_by(section_name=section_name).order_by(Config.setting_name.asc()) - section_data = execute_session_query(query, 'all') + if not self.is_section_in_config(section_name): + return {} - section_dict = {} - for setting in section_data: - setting_dict = setting.__dict__ - - setting_dict = convert_type_of_value(setting_dict, self.logger) - - setting_name = setting_dict["setting_name"] - setting_value = setting_dict["value"] - - section_dict[setting_name] = setting_value - - return section_dict - + config_dict = self.load_config() + return config_dict[section_name] def get_value(self, section_name: str, setting_name: str) -> Optional[Any]: """Get the value of a setting from the config. @@ -160,50 +213,27 @@ def get_value(self, section_name: str, setting_name: str) -> Optional[Any]: The value from config if found, and None otherwise """ - # TODO temporary until added to the DB schema - if section_name == "frontend" and setting_name == "pagination_offset": - return 25 - - try: - query = self.session.query(Config).filter(Config.section_name == section_name, Config.setting_name == setting_name) - config_setting = execute_session_query(query, 'one') - except s.orm.exc.NoResultFound: - return None - - setting_dict = config_setting.__dict__ - - setting_dict = convert_type_of_value(setting_dict, self.logger) - - return setting_dict["value"] + # TODO temporary until all uses of the lowercase version are gone + if section_name == "frontend": + section_name = "Frontend" + for source in reversed(self.config_sources): + val = source.get_value(section_name, setting_name) + if val is not None: + return val + return None def load_config(self) -> dict: """Get full config as a dictionary. Returns: - The config from the database + The config from all sources """ - # get all the sections in the config table - query = self.session.query(Config.section_name).order_by(Config.section_name.asc()) - section_names = execute_session_query(query, 'all') - config = {} - # loop through and get the data for each section - for section_name in section_names: - - section_data = self.get_section(section_name[0]) - - # rows with a section of None are on the top level, - # so we are adding these values to the top level rather - # than creating a section for them - if section_name[0] is None: - for key in list(section_data.keys()): - config[key] = section_data[key] - continue - - # add section data to config object - config[section_name[0]] = section_data + for config_source in self.config_sources: + config.update(config_source.retrieve_dict()) + return config @@ -213,8 +243,7 @@ def empty(self) -> bool: Returns: True if the config is empty, and False if it is not """ - query = self.session.query(Config) - return execute_session_query(query, 'first') is None + return all(map(lambda s: s.empty), self.config_sources) def is_section_in_config(self, section_name: str) -> bool: """Determine if a section is in the config. @@ -225,54 +254,21 @@ def is_section_in_config(self, section_name: str) -> bool: Returns: True if section is in the config, and False if it is not """ - query = self.session.query(Config).filter(Config.section_name == section_name) - return execute_session_query(query, 'first') is not None - - - def add_or_update_settings(self, settings: List[dict]): - """Add or update a list of settings. - + return any(map(lambda s: s.has_section(section_name), self.config_sources)) + + def add_value(self, section_name, setting_name, value): + """Adds or updates a config value. + Args: - list of settings with dicts containing section_name, setting_name, value, and optionally type - - Examples: - type is optional - setting = { - "section_name": section_name, - "setting_name": setting_name, - "value": value, - "type": data_type # optional - } + section_name: The name of the section being added + json_data: The data being added """ - for setting in settings: - - if "type" not in setting: - setting["type"] = setting["value"].__class__.__name__ - - if setting["type"] == "NoneType": - setting["type"] = None - - #print(f"\nsetting: {settings}") - #self.session.insert_data(settings,Config, ["section_name", "setting_name"]) - - #Check if setting exists. - query = self.session.query(Config).filter(and_(Config.section_name == setting["section_name"],Config.setting_name == setting["setting_name"]) ) - - if execute_session_query(query, 'first') is None: - # TODO: Update to use bulk insert dicts so config doesn't require database session - self.session.insert_data(setting,Config, ["section_name", "setting_name"]) - else: - #If setting exists. use raw update to not increase autoincrement - update_query = ( - update(Config) - .where(Config.section_name == setting["section_name"]) - .where(Config.setting_name == setting["setting_name"]) - .values(value=setting["value"]) - ) - - self.session.execute(update_query) - self.session.commit() - + try: + writeable_config = self._get_writable_source() + writeable_config.add_value(section_name, setting_name, value, ignore_existing=True) + except NotWriteableException: + return + def add_section_from_json(self, section_name: str, json_data: dict) -> None: """Add a section from a dict. @@ -281,26 +277,11 @@ def add_section_from_json(self, section_name: str, json_data: dict) -> None: section_name: The name of the section being added json_data: The data being added """ - data_keys = list(json_data.keys()) - - settings = [] - for key in data_keys: - - value = json_data[key] - - if isinstance(value, dict) is True: - # TODO: Uncomment out when insights worker config stuff is resolved - # self.logger.error(f"Values cannot be of type dict: {value}") - return - - setting = { - "section_name": section_name, - "setting_name": key, - "value": json_data[key], - } - settings.append(setting) - - self.add_or_update_settings(settings) + try: + writeable_config = self._get_writable_source() + writeable_config.create_section(section_name, json_data, ignore_existing=True) + except NotWriteableException: + return def load_config_file(self, file_path: str) -> dict: @@ -339,19 +320,450 @@ def load_config_from_dict(self, dict_data: dict) -> None: def clear(self) -> None: """Remove all values from the config.""" - self.session.query(Config).delete() - self.session.commit() + # note, with the hierarchical nature of the new config setup, this is a pretty useless method + # this is because the hierarhical store is designed to always be able to fall back on preconfigured defaults. + # Clearing will only reset any changes that the writable source provided to the config. + try: + writeable_config = self._get_writable_source() + writeable_config.clear() + except NotWriteableException: + return + + def remove_section(self, section_name: str) -> None: + """Remove a section from the config. + + Args: + section_name: The name of the section being deleted + """ + # note, with the hierarchical nature of the new config setup, this is a pretty useless method + # this is because the hierarhical store is designed to always be able to fall back on preconfigured defaults. + # Removing a section will only reset any changes that the writable source contributed in that section. + try: + writeable_config = self._get_writable_source() + writeable_config.remove_section(section_name) + except NotWriteableException: + return + +class NotWriteableException(Exception): + """Custom Augur exception class to be used when trying to modify a config that is not writeable + """ + pass + +class ConfigStore(): + """A class representing the interface for various possible config backends. + This should not contain implementations unless they apply to all possible config backends + """ + + def __init__(self, logger: logging.Logger): + self.logger = logger + + @property + def writable(self): + """Determine if this config store is writable. + + Returns: + True if the config store is writable, and False if it is not + """ + raise NotImplementedError() + + @property + def empty(self): + """Determine if this config store is empty. + + Returns: + True if the config store is empty, and False if it is not + """ + raise NotImplementedError() + + def load_dict(self, data: dict, ignore_existing=False): + """Load config into this store from dict values + + Args: + data (dict): the data to load + ignore_existing (bool, optional): whether to ignore any values or sections that exist already. Defaults to False. + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def retrieve_dict(self): + """Get the full config from this store as a dictionary. + + Returns: + dict: The dict representation of the config from this config store + """ + raise NotImplementedError() + + def clear(self): + """Remove all values from this config store. + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() def remove_section(self, section_name: str) -> None: """Remove a section from the config. Args: section_name: The name of the section being deleted + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def has_section(self, section_name: str) -> bool: + """Determine if a section exists in this config. + + Args: + section_name: The name of the section to check for + + Returns: + True if the config store contains this section, and False if it is not + """ + raise NotImplementedError() + + def create_section(self, section_name: str, values: Optional[dict] = None, ignore_existing=False) -> None: + """Create a section in this config. + + Args: + section_name: The name of the section being deleted + values (Optional[dict], optional): Optional keys and values to populate in this section. Defaults to None. + ignore_existing (bool, optional): whether to ignore and overwrite an existing section or value with this name. Defaults to False. + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. """ + raise NotImplementedError() + + def get_section(self, section_name: str) -> dict: + """Return a section from this config store. + + Args: + section_name: The name of the section to check for + + Returns: + The section data as a dict + """ + raise NotImplementedError() + + def remove_value(self, section_name: str, value_key: str) -> None: + """Remove a value from the config. + + Args: + section_name: The name of the section the value is in + value_name: The key of the value being deleted + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def has_value(self, section_name: str, value_key: str) -> bool: + """Determine if a section exists in this config. + + Args: + section_name: The name of the section the value is in + value_key: The key at which to look for a value + + Returns: + True if the config store contains this value, and False if not + """ + raise NotImplementedError() + + def add_value(self, section_name: str, value_key: str, value, ignore_existing=False) -> None: + """Create a section in this config. + + Args: + section_name: The name of the section being deleted + value_key (str): The key at which to store this value + value (any): the value to store at this key + ignore_existing (bool, optional): whether to ignore and overwrite an existing value if encountered. Defaults to False. + + Raises: + NotWriteableException: When attempting to modify a config that is not writeable. + """ + raise NotImplementedError() + + def get_value(self, section_name: str, value_key: str): + """Return a single value from this config store. + + Args: + section_name: The name of the section to check for + value_key (str): The key at which to look for a value + + Returns: + The section data as a dict + """ + raise NotImplementedError() + + + + +class JsonConfig(ConfigStore): + """A ConfigStore for handling JSON data + """ + + def __init__(self, json_data, logger: logging.Logger): + super().__init__(logger) + self.json_data = json_data + + @property + def writable(self): + return False + + @property + def empty(self): + return self.json_data == {} + + def load_dict(self, data: dict, ignore_existing=False): + if not self.writable: + raise NotWriteableException() + + if ignore_existing: + self.json_data = data + else: + self.json_data.update(data) + + def retrieve_dict(self): + return self.json_data + + def clear(self): + if not self.writable: + raise NotWriteableException() + + self.json_data = {} + + def remove_section(self, section_name: str) -> None: + if not self.writable: + raise NotWriteableException() + + del self.json_data[section_name] + + + def has_section(self, section_name: str) -> bool: + return section_name in self.json_data + + def create_section(self, section_name: str, values: Optional[dict] = None, ignore_existing=False) -> None: + if not self.writable: + raise NotWriteableException() + + if values is None: + values = {} + + if ignore_existing: + self.json_data[section_name] = values + else: + self.json_data[section_name].update(values) + + def get_section(self, section_name: str) -> dict: + if self.has_section(section_name): + return self.json_data[section_name] + + def remove_value(self, section_name: str, value_key: str) -> None: + if not self.writable: + raise NotWriteableException() + + if self.has_section(section_name): + del self.json_data[section_name][value_key] + + + def has_value(self, section_name: str, value_key: str) -> bool: + return self.has_section(section_name) and self.json_data[section_name].get(value_key, None) is not None + + def add_value(self, section_name: str, value_key: str, value, ignore_existing=False) -> None: + if not self.writable: + raise NotWriteableException() + + if not self.has_section(section_name): + self.create_section(section_name, {[value_key]: value}, ignore_existing=ignore_existing) + return + + if ignore_existing: + self.json_data[section_name][value_key] = value + else: + self.json_data[section_name][value_key].update(value) + + + def get_value(self, section_name: str, value_key: str): + if not self.has_section(section_name): + return None + + return self.json_data[section_name].get(value_key, None) + + + +class DatabaseConfig(ConfigStore): + """A ConfigStore for handling JSON data + """ + from augur.application.db.session import DatabaseSession + + def __init__(self, session: DatabaseSession, logger: logging.Logger): + super().__init__(logger) + self.session = session + + @property + def writable(self): + return True + + @property + def empty(self): + query = self.session.query(Config) + return execute_session_query(query, 'first') is None + + @staticmethod + def _dict_to_config_table(json_data:dict): + """Convert an augur settings dict into a mapping from table columns to values for insertion in bulk + + Args: + json_data (dict): The settings to convert, in the same format as the default_dict at the top of this file + """ + + config_values = [] + for section_name, settings in json_data.items(): + for key, value in settings.items(): + + if isinstance(value, dict) is True: + # TODO: Uncomment out when insights worker config stuff is resolved + # self.logger.error(f"Values cannot be of type dict: {value}") + return + + setting = { + "section_name": section_name, + "setting_name": key, + "value": value, + } + + if "type" not in setting: + setting["type"] = setting["value"].__class__.__name__ + + if setting["type"] == "NoneType": + setting["type"] = None + + config_values.append(setting) + + return config_values + + + def load_dict(self, data: dict, ignore_existing=False): + if not self.writable: + raise NotWriteableException() + + for section, config_values in data.items(): + self.create_section(section, config_values, ignore_existing=ignore_existing) + + def retrieve_dict(self): + # get all the sections in the config table + query = self.session.query(Config.section_name).order_by(Config.section_name.asc()) + section_names = execute_session_query(query, 'all') + + config = {} + # loop through and get the data for each section + for section_name in section_names: + + section_data = self.get_section(section_name[0]) + + # rows with a section of None are on the top level, + # so we are adding these values to the top level rather + # than creating a section for them + if section_name[0] is None: + for key in list(section_data.keys()): + config[key] = section_data[key] + continue + + # add section data to config object + config[section_name[0]] = section_data + + return config + + def clear(self): + if not self.writable: + raise NotWriteableException() + + self.session.query(Config).delete() + self.session.commit() + + def remove_section(self, section_name: str) -> None: + if not self.writable: + raise NotWriteableException() + self.session.query(Config).filter(Config.section_name == section_name).delete() self.session.commit() - def create_default_config(self) -> None: - """Create default config in the database.""" - self.load_config_from_dict(self.default_config) + def has_section(self, section_name: str) -> bool: + query = self.session.query(Config).filter(Config.section_name == section_name) + return execute_session_query(query, 'first') is not None + + def create_section(self, section_name: str, values: Optional[dict] = None, ignore_existing=False) -> None: + if not self.writable: + raise NotWriteableException() + + if values is None: + values = {} + + for key, value in values.items(): + self.add_value(section_name, key, value, ignore_existing=ignore_existing) + + def get_section(self, section_name: str) -> dict: + query = self.session.query(Config).filter_by(section_name=section_name).order_by(Config.setting_name.asc()) + section_data = execute_session_query(query, 'all') + + section_dict = {} + for setting in section_data: + setting_dict = setting.__dict__ + + setting_dict = convert_type_of_value(setting_dict, self.logger) + + setting_name = setting_dict["setting_name"] + setting_value = setting_dict["value"] + + section_dict[setting_name] = setting_value + + return section_dict + + def remove_value(self, section_name: str, value_key: str) -> None: + raise NotImplementedError() + + def has_value(self, section_name: str, value_key: str) -> bool: + query = self.session.query(Config).filter(and_(Config.section_name == section_name,Config.setting_name == value_key) ) + return execute_session_query(query, 'first') is not None + + def add_value(self, section_name: str, value_key: str, value, ignore_existing=False) -> None: + + converted_settings = self._dict_to_config_table({section_name: { value_key: value}}) + + if len(converted_settings) >= 1: + setting = converted_settings[0] + + if not self.has_value(section_name, value_key): + self.session.insert_data(setting,Config, ["section_name", "setting_name"]) + else: + if not ignore_existing: + self.logger.error(f"Could not insert config value '{redact_setting_value(section_name, value_key, value)}' into section '{section_name}' for key '{value_key}' database because a value already exists there and caller did not specify override") + return + #If setting exists. use raw update to not increase autoincrement + update_query = ( + update(Config) + .where(Config.section_name == setting["section_name"]) + .where(Config.setting_name == setting["setting_name"]) + .values(value=setting["value"]) + ) + + self.session.execute(update_query) + self.session.commit() + + def get_value(self, section_name: str, value_key: str): + try: + query = self.session.query(Config).filter(Config.section_name == section_name, Config.setting_name == value_key) + config_setting = execute_session_query(query, 'one') + except s.orm.exc.NoResultFound: + return None + + setting_dict = config_setting.__dict__ + + setting_dict = convert_type_of_value(setting_dict, self.logger) + + return setting_dict["value"] diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index de0d9aaa81..eaa99fd394 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -457,7 +457,7 @@ def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, too -def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Retrieve only the needed data for pr labels from the api response @@ -487,7 +487,7 @@ def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: return message_ref_dict # retrieve only the needed data for pr labels from the api response -def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: message_ref_dict = { 'pull_request_id': pull_request_id, @@ -1128,7 +1128,7 @@ def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, t return all_meta -def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Extract the message id for a given message on an issue from an api response and connect it to the relevant repo id. @@ -1190,7 +1190,7 @@ def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: return comment_dict -def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Retrieve only the needed data for pr labels from the api response diff --git a/augur/application/db/engine.py b/augur/application/db/engine.py index 2870909093..0ea2bc1730 100644 --- a/augur/application/db/engine.py +++ b/augur/application/db/engine.py @@ -10,7 +10,7 @@ from augur.application.db.util import catch_operational_error -def parse_database_string(db_string: str) -> str: +def parse_database_string(db_string: str) -> tuple[str,str, str, str, str]: """Parse database string into the following components: username, password, host, port, database """ diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index a82c97dd66..4f106b0a7d 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -19,34 +19,6 @@ logger = logging.getLogger("db_lib") -def get_section(section_name) -> dict: - """Get a section of data from the config. - - Args: - section_name: The name of the section being retrieved - - Returns: - The section data as a dict - """ - with get_session() as session: - - query = session.query(Config).filter_by(section_name=section_name) - section_data = execute_session_query(query, 'all') - - section_dict = {} - for setting in section_data: - setting_dict = setting.__dict__ - - setting_dict = convert_type_of_value(setting_dict, logger) - - setting_name = setting_dict["setting_name"] - setting_value = setting_dict["value"] - - section_dict[setting_name] = setting_value - - return section_dict - - def get_value(section_name: str, setting_name: str) -> Optional[Any]: """Get the value of a setting from the config. @@ -217,8 +189,6 @@ def facade_bulk_insert_commits(logger, records): session.rollback() if len(records) > 1: - logger.error(f"Ran into issue when trying to insert commits \n Error: {e}") - #split list into halves and retry insert until we isolate offending record firsthalfRecords = records[:len(records)//2] secondhalfRecords = records[len(records)//2:] @@ -273,31 +243,54 @@ def facade_bulk_insert_commits(logger, records): session.commit() else: raise e - -def batch_insert_contributors(logger, data: Union[List[dict], dict]) -> Optional[List[dict]]: - - batch_size = 1000 +def batch_insert_contributors(logger, data: Union[List[dict], dict], batch_size = 1000) -> Optional[List[dict]]: for i in range(0, len(data), batch_size): batch = data[i:i + batch_size] bulk_insert_dicts(logger, batch, Contributor, ['cntrb_id']) + + return None -def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: +def bulk_insert_dicts(logger, data_input: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: + """ Provides bulk-insert/update (upsert) capabilitites for adding bulk data (as a column:value dict mapping) into a specific table + + Args: + logger (_type_): the logger to use + data_input (Union[List[dict], dict]): the dicts to upsert (must match the column names as defined in the schema for the table) + table (_type_): the table to upsert the data into + natural_keys (List[str]): the columns that define the natural unique keys for the data + return_columns (Optional[List[str]], optional): list of the column names to return. Defaults to None. + string_fields (Optional[List[str]], optional): list of keys in the incoming dicts that should be cleaned to handle bad characters postgres doesnt like. Defaults to None. + on_conflict_update (bool, optional): whether to update on conflict. Defaults to True. + + Raises: + e: _description_ + e: _description_ + Exception: _description_ + e: _description_ + e: _description_ + Exception: _description_ + + Returns: + Optional[List[dict]]: the original data with each item filtered to only contain the columns specified by `return_columns`, if present. + """ - if isinstance(data, list) is False: + if isinstance(data_input, list) is False: # if a dict is passed to data then # convert it to a list with one value - if isinstance(data, dict) is True: - data = [data] + if isinstance(data_input, dict) is True: + data = [data_input] else: logger.error("Data must be a list or a dict") return None + else: + data = list(data_input) if len(data) == 0: # self.logger.info("Gave no data to insert, returning...") @@ -333,8 +326,10 @@ def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys # create a dict that the on_conflict_do_update method requires to be able to map updates whenever there is a conflict. See sqlalchemy docs for more explanation and examples: https://docs.sqlalchemy.org/en/14/dialects/postgresql.html#updating-using-the-excluded-insert-values setDict = {} + base_table = getattr(table, "__table__", table) for key in data[0].keys(): - setDict[key] = getattr(stmnt.excluded, key) + existing_col = getattr(base_table.c, key) + setDict[key] = func.coalesce(getattr(stmnt.excluded, key), existing_col) stmnt = stmnt.on_conflict_do_update( #This might need to change @@ -399,8 +394,9 @@ def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys if deadlock_detected is True: logger.error("Made it through even though Deadlock was detected") - - return "success" + + # success + return None # othewise it gets the requested return columns and returns them as a list of dicts @@ -604,4 +600,4 @@ def get_repo_group_by_name(name): with get_session() as session: return session.query(RepoGroup).filter(RepoGroup.rg_name == name).first() - \ No newline at end of file + diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 9212bcc5e9..c80077d9b6 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -939,7 +939,7 @@ def is_valid_github_repo(gh_session, url: str) -> bool: ) wait_until_time = localtime(wait_until) logger.error(f"rate limited fetching {url}") - logger.error(f"sleeping until {wait_until_time.tm_hour}:{wait_until_time.tm_min} ({wait_in_seconds} seconds)") + logger.error(f"sleeping until {wait_until_time.tm_hour:02d}:{wait_until_time.tm_min:02d} ({wait_in_seconds} seconds)") sleep(wait_in_seconds) attempts+=1 continue @@ -2885,7 +2885,7 @@ class PullRequestAssignee(Base): @classmethod def from_github(cls, assignee, repo_id, tool_source, tool_version, data_source): - pr_assignee_ojb = cls() + pr_assignee_obj = cls() # store the pr_url data on in the pr assignee data for now so we can relate it back to a pr later pr_assignee_obj.contrib_id = assignee["cntrb_id"] @@ -3600,4 +3600,4 @@ class RepoClone(Base): count_clones = Column(BigInteger) clone_data_timestamp = Column(TIMESTAMP(precision=6)) - repo = relationship("Repo") \ No newline at end of file + repo = relationship("Repo") diff --git a/augur/application/db/session.py b/augur/application/db/session.py index a26fc172b7..920b6fe6b0 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -3,6 +3,7 @@ from sqlalchemy.orm import Session from sqlalchemy.dialects import postgresql from sqlalchemy.exc import OperationalError +from sqlalchemy import func from typing import Optional, List, Union from psycopg2.errors import DeadlockDetected @@ -93,18 +94,20 @@ def fetchall_data_from_sql_text(self,sql_text): result = connection.execute(sql_text) return [dict(row) for row in result.mappings()] - def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: + def insert_data(self, data_input: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: - if isinstance(data, list) is False: + if isinstance(data_input, list) is False: # if a dict is passed to data then # convert it to a list with one value - if isinstance(data, dict) is True: - data = [data] + if isinstance(data_input, dict) is True: + data = [data_input] else: self.logger.info("Data must be a list or a dict") return None + else: + data = list(data_input) if len(data) == 0: # self.logger.info("Gave no data to insert, returning...") @@ -140,8 +143,10 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s # create a dict that the on_conflict_do_update method requires to be able to map updates whenever there is a conflict. See sqlalchemy docs for more explanation and examples: https://docs.sqlalchemy.org/en/14/dialects/postgresql.html#updating-using-the-excluded-insert-values setDict = {} + base_table = getattr(table, "__table__", table) for key in data[0].keys(): - setDict[key] = getattr(stmnt.excluded, key) + existing_col = getattr(base_table.c, key) + setDict[key] = func.coalesce(getattr(stmnt.excluded, key), existing_col) stmnt = stmnt.on_conflict_do_update( #This might need to change @@ -166,7 +171,7 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s # if there is no data to return then it executes the insert then returns nothing if not return_columns: - + # TODO: duplicate-looking code alert while attempts < 10: try: #begin keyword is needed for sqlalchemy 2.x @@ -205,8 +210,9 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s if deadlock_detected is True: self.logger.error("Made it through even though Deadlock was detected") - - return "success" + + # success + return None # othewise it gets the requested return columns and returns them as a list of dicts diff --git a/augur/application/schema/alembic/versions/34_add_contrib_to_config.py b/augur/application/schema/alembic/versions/34_add_contrib_to_config.py new file mode 100644 index 0000000000..1a87be365e --- /dev/null +++ b/augur/application/schema/alembic/versions/34_add_contrib_to_config.py @@ -0,0 +1,57 @@ +"""Add Facade contributor full recollect to config, default to off (0) + +Revision ID: 34 +Revises: 33 +Create Date: 2025-10-09 12:03:57.171011 + +""" +from alembic import op +from augur.application.db.session import DatabaseSession +from augur.application.config import * +from sqlalchemy.sql import text +import logging + +# revision identifiers, used by Alembic. +revision = '34' +down_revision = '33' +branch_labels = None +depends_on = None + +logger = logging.getLogger(__name__) + +def upgrade(): + + with DatabaseSession(logger) as session: + config = AugurConfig(logger,session) + config_dict = config.load_config() + + #Update the missing fields of the facade section in the config + section = config_dict.get("Facade") + + #Just copy the default if section doesn't exist. + if section: + if 'facade_contributor_full_recollect' not in section.keys(): + section['facade_contributor_full_recollect'] = 0 + + else: + section = config.default_config["Facade"] + + config.add_section_from_json("Facade", section) + + +def downgrade(): + + conn = op.get_bind() + + conn.execute(text(f""" + DELETE FROM augur_operations.config + WHERE section_name='Facade' AND (setting_name='facade_contributor_full_recollect'); + """)) + + try: + conn.execute(text(f""" + DELETE FROM augur_operations.config + WHERE section_name='Facade' AND (setting_name='facade_contributor_full_recollect'); + """)) + except: + pass \ No newline at end of file diff --git a/augur/application/schema/repo_load_sample.csv b/augur/application/schema/repo_load_sample.csv index b04519f30f..fb537d4949 100644 --- a/augur/application/schema/repo_load_sample.csv +++ b/augur/application/schema/repo_load_sample.csv @@ -1,8 +1,8 @@ -10,https://github.com/chaoss/augur.git -10,https://github.com/chaoss/grimoirelab.git -20,https://github.com/chaoss/wg-evolution.git -20,https://github.com/chaoss/wg-risk.git -20,https://github.com/chaoss/wg-common.git -20,https://github.com/chaoss/wg-value.git -20,https://github.com/chaoss/wg-diversity-inclusion.git -20,https://github.com/chaoss/wg-app-ecosystem.git +https://github.com/chaoss/augur.git,10 +https://github.com/chaoss/grimoirelab.git,10 +https://github.com/chaoss/wg-evolution.git,20 +https://github.com/chaoss/wg-risk.git,20 +https://github.com/chaoss/wg-common.git,20 +https://github.com/chaoss/wg-value.git,20 +https://github.com/chaoss/wg-diversity-inclusion.git,20 +https://github.com/chaoss/wg-app-ecosystem.git,20 diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index 15537a4d1a..5baaed20d4 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -112,8 +112,6 @@ def trim_commits_post_analysis_facade_task(repo_git): repo = repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - start_date = facade_helper.get_setting('start_date') - logger.info(f"Generating sequence for repo {repo_id}") repo = get_repo_by_repo_git(repo_git) @@ -123,7 +121,7 @@ def trim_commits_post_analysis_facade_task(repo_git): repo_loc = (f"{absolute_path}/.git") # Grab the parents of HEAD - parent_commits = get_parent_commits_set(repo_loc, start_date) + parent_commits = get_parent_commits_set(repo_loc) # Grab the existing commits from the database existing_commits = get_existing_commits_set(repo_id) @@ -237,8 +235,6 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - start_date = facade_helper.get_setting('start_date') - logger.info(f"Generating sequence for repo {repo_id}") repo = get_repo_by_repo_id(repo_id) @@ -248,7 +244,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: repo_loc = (f"{absolute_path}/.git") # Grab the parents of HEAD - parent_commits = get_parent_commits_set(repo_loc, start_date) + parent_commits = get_parent_commits_set(repo_loc) # Grab the existing commits from the database existing_commits = get_existing_commits_set(repo_id) @@ -438,11 +434,6 @@ def generate_analysis_sequence(logger,repo_git, facade_helper): analysis_sequence = [] - #repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git) - #repos = fetchall_data_from_sql_text(repo_list) - - start_date = facade_helper.get_setting('start_date') - #repo_ids = [repo['repo_id'] for repo in repos] #repo_id = repo_ids.pop(0) @@ -473,8 +464,6 @@ def facade_phase(repo_git, full_collection): #repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git) #repos = fetchall_data_from_sql_text(repo_list) - start_date = facade_helper.get_setting('start_date') - #repo_ids = [repo['repo_id'] for repo in repos] #repo_id = repo_ids.pop(0) diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index c62034a94e..21fe424d10 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -35,8 +35,8 @@ from psycopg2.errors import DeadlockDetected from augur.application.db.session import DatabaseSession +from augur.application.config import AugurConfig from augur.application.db.lib import execute_sql -from augur.application.db.lib import get_section from logging import Logger logger = logging.getLogger(__name__) @@ -110,7 +110,10 @@ def __init__(self,logger: Logger): self.logger = logger - worker_options = get_section("Facade") + with DatabaseSession(logger, engine) as session: + config = AugurConfig(logger, session) + + worker_options = config.get_section("Facade") self.limited_run = worker_options["limited_run"] self.delete_marked_repos = worker_options["delete_marked_repos"] @@ -127,6 +130,7 @@ def __init__(self,logger: Logger): self.rebuild_caches = worker_options["rebuild_caches"] self.multithreaded = worker_options["multithreaded"] self.create_xlsx_summary_files = worker_options["create_xlsx_summary_files"] + self.facade_contributor_full_recollect = worker_options["facade_contributor_full_recollect"] self.tool_source = "Facade" self.data_source = "Git Log" @@ -166,7 +170,13 @@ def log_activity(self, level, status): # Log an activity based upon urgency and user's preference. If the log level is # "Debug", then just print it and don't save it in the database. log_options = ('Error','Quiet','Info','Verbose','Debug') - self.logger.info(f"* {status}\n") + logmsg = f"* {status}\n" + if level == "Error": + self.logger.error(logmsg) + elif level == "Debug" or level == "Verbose": + self.logger.debug(logmsg) + else: + self.logger.info(logmsg) #Return if only debug if level == 'Debug': @@ -244,245 +254,3 @@ def insert_or_update_data(self, query, **bind_args)-> None: return def inc_repos_processed(self): self.repos_processed += 1 - -""" -class FacadeConfig: - \"""Legacy facade config that holds facade's database functionality - - This is mainly for compatibility with older functions from legacy facade. - - Initializes database when it encounters a database exception - - Attributes: - repos_processed (int): Counter for how many repos have been analyzed - cursor (psycopg2.extensions.cursor): database cursor for legacy facade. - logger (Logger): logger object inherited from the session object - db (psycopg2.extensions.connection): database connection object for legacy facade. - tool_source (str): String marking the source of data as from facade. - data_source (str): String indicating that facade gets data from git - tool_version (str): Facade version - worker_options (dict): Config options for facade. - log_level (str): Keyword indicating level of logging for legacy facade. - \""" - def __init__(self, logger: Logger): - self.repos_processed = 0 - self.cursor = None - self.logger = logger - - self.db = None - - #init db first thing - db_credentials = get_database_args_from_env() - - # Set up the database - db_user = db_credentials["db_user"] - db_pass = db_credentials["db_pass"] - db_name = db_credentials["db_name"] - db_host = db_credentials["db_host"] - db_port = db_credentials["db_port"] - db_user_people = db_user - db_pass_people = db_pass - db_name_people = db_name - db_host_people = db_host - db_port_people = db_port - # Open a general-purpose connection - db,cursor = self.database_connection( - db_host, - db_user, - db_pass, - db_name, - db_port, False, False) - - #worker_options = read_config("Workers", "facade_worker", None, None) - - with DatabaseSession(logger, engine) as session: - config = AugurConfig(logger, session) - worker_options = config.get_section("Facade") - - if 'repo_directory' in worker_options: - self.repo_base_directory = worker_options['repo_directory'] - else: - self.log_activity('Error',"Please specify a \'repo_directory\' parameter" - " in your \'Workers\' -> \'facade_worker\' object in your config " - "to the directory in which you want to clone repos. Exiting...") - sys.exit(1) - - self.tool_source = '\'Facade \'' - self.tool_version = '\'1.3.0\'' - self.data_source = '\'Git Log\'' - - self.worker_options = worker_options - - # Figure out how much we're going to log - #logging.basicConfig(filename='worker_{}.log'.format(worker_options['port']), filemode='w', level=logging.INFO) - self.log_level = None #self.get_setting('log_level') - - - #### Database update functions #### - - def increment_db(self, version): - - # Helper function to increment the database number - - increment_db = ("INSERT INTO settings (setting,value) " - "VALUES ('database_version',%s)") - self.cursor.execute(increment_db, (version, )) - db.commit() - - print("Database updated to version: %s" % version) - - def update_db(self, version): - - # This function should incrementally step any version of the database up to - # the current schema. After executing the database operations, call - # increment_db to bring it up to the version with which it is now compliant. - - print("Legacy Facade Block for DB UPDATE. No longer used. ") - - print("No further database updates.\n") - - def migrate_database_config(self): - - # Since we're changing the way we store database credentials, we need a way to - # transparently migrate anybody who was using the old file. Someday after a long - # while this can disappear. - - try: - # If the old database config was found, write a new config - imp.find_module('db') - - db_config = configparser.ConfigParser() - - from db import db_user,db_pass,db_name,db_host - from db import db_user_people,db_pass_people,db_name_people,db_host_people - - db_config.add_section('main_database') - db_config.set('main_database','user',db_user) - db_config.set('main_database','pass',db_pass) - db_config.set('main_database','name',db_name) - db_config.set('main_database','host',db_host) - - db_config.add_section('people_database') - db_config.set('people_database','user',db_user_people) - db_config.set('people_database','pass',db_pass_people) - db_config.set('people_database','name',db_name_people) - db_config.set('people_database','host',db_host_people) - - with open('db.cfg','w') as db_file: - db_config.write(db_file) - - print("Migrated old style config file to new.") - except: - # If nothing is found, the user probably hasn't run setup yet. - sys.exit("Can't find database config. Have you run setup.py?") - - try: - os.remove('db.py') - os.remove('db.pyc') - print("Removed unneeded config files") - except: - print("Attempted to remove unneeded config files") - - return db_user,db_pass,db_name,db_host,db_user_people,db_pass_people,db_name_people,db_host_people - - #### Global helper functions #### - - def database_connection(self, db_host,db_user,db_pass,db_name, db_port, people, multi_threaded_connection): - - # Return a database connection based upon which interpreter we're using. CPython - # can use any database connection, although MySQLdb is preferred over pymysql - # for performance reasons. However, PyPy can't use MySQLdb at this point, - # instead requiring a pure python MySQL client. This function returns a database - # connection that should provide maximum performance depending upon the - # interpreter in use. - - ##TODO: Postgres connections as we make them ARE threadsafe. We *could* refactor this accordingly: https://www.psycopg.org/docs/connection.html #noturgent - - - # if platform.python_implementation() == 'PyPy': - db_schema = 'augur_data' - db = psycopg2.connect( - host = db_host, - user = db_user, - password = db_pass, - database = db_name, - # charset = 'utf8mb4', - port = db_port, - options=f'-c search_path={db_schema}', - connect_timeout = 31536000,) - - cursor = db.cursor()#pymysql.cursors.DictCursor) - - - self.cursor = cursor - self.db = db - - # Figure out how much we're going to log - #self.log_level = self.get_setting('log_level') - #Not getting debug logging for some reason. - self.log_level = 'Debug' - return db, cursor - - def get_setting(self, setting): - - # Get a setting from the database - - query = (\"""SELECT value FROM settings WHERE setting=%s ORDER BY - last_modified DESC LIMIT 1\""") - self.cursor.execute(query, (setting, )) - # print(type(self.cursor.fetchone())) - return self.cursor.fetchone()[0]#["value"] - - def update_status(self, status): - - # Update the status displayed in the UI - - query = ("UPDATE settings SET value=%s WHERE setting='utility_status'") - self.cursor.execute(query, (status, )) - self.db.commit() - - def log_activity(self, level, status): - - # Log an activity based upon urgency and user's preference. If the log level is - # "Debug", then just print it and don't save it in the database. - - log_options = ('Error','Quiet','Info','Verbose','Debug') - self.logger.info("* %s\n" % status) - if self.log_level == 'Debug' and level == 'Debug': - return - - #if log_options.index(level) <= log_options.index(self.log_level): - query = ("INSERT INTO utility_log (level,status) VALUES (%s,%s)") - try: - self.cursor.execute(query, (level, status)) - self.db.commit() - except Exception as e: - self.logger.info('Error encountered: {}\n'.format(e)) - - db_credentials = get_database_args_from_env() - - # Set up the database - db_user = db_credentials["db_user"] - db_pass = db_credentials["db_pass"] - db_name = db_credentials["db_name"] - db_host = db_credentials["db_host"] - db_port = db_credentials["db_port"] - db_user_people = db_user - db_pass_people = db_pass - db_name_people = db_name - db_host_people = db_host - db_port_people = db_port - # Open a general-purpose connection - db,cursor = self.database_connection( - db_host, - db_user, - db_pass, - db_name, - db_port, False, False) - self.cursor.execute(query, (level, status)) - self.db.commit() - - - - -""" diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index 874f338902..f754f4e098 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -435,7 +435,7 @@ def git_repo_updates(facade_helper, repo_git): cmdpull2 = (f"git -C {absolute_path} pull") - cmd_reset = (f"git -C {absolute_path} reset --hard origin") + cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}") cmd_reset_wait = subprocess.Popen( [cmd_reset], shell=True).wait() diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index caae6c02ba..c06614ac7d 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -105,10 +105,10 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name): return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}" -def get_parent_commits_set(absolute_repo_path, start_date): +def get_parent_commits_set(absolute_repo_path): parents = subprocess.Popen(["git --git-dir %s log --ignore-missing " - "--pretty=format:'%%H' --since=%s" % (absolute_repo_path,start_date)], + "--pretty=format:'%%H'" % (absolute_repo_path)], stdout=subprocess.PIPE, shell=True) parent_commits = set(parents.stdout.read().decode("utf-8",errors="ignore").split(os.linesep)) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 1b11f98223..eff64df6ee 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -8,6 +8,7 @@ from augur.application.db.models import Contributor from augur.tasks.github.facade_github.core import * from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors +from augur.application.db.lib import get_session, execute_session_query from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * @@ -198,6 +199,12 @@ def insert_facade_contributors(self, repo_git): logger = logging.getLogger(insert_facade_contributors.__name__) repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id + facade_helper = FacadeHelper(logger) + + with get_session() as session: + query = session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo.repo_id) + collection_status = execute_session_query(query,'one') + last_collected_date = collection_status.facade_data_last_collected if not facade_helper.facade_contributor_full_recollect else None # Get all of the commit data's emails and names from the commit table that do not appear # in the contributors table or the contributors_aliases table. @@ -214,6 +221,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE commits.repo_id = :repo_id + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND (NOT EXISTS ( SELECT contributors.cntrb_canonical FROM contributors WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email ) or NOT EXISTS ( SELECT contributors_aliases.alias_email from contributors_aliases where contributors_aliases.alias_email = commits.cmt_author_raw_email) AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name )) @@ -231,6 +239,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE commits.repo_id = :repo_id + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND EXISTS ( SELECT unresolved_commit_emails.email FROM unresolved_commit_emails WHERE unresolved_commit_emails.email = commits.cmt_author_raw_email ) AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name ) GROUP BY @@ -239,7 +248,7 @@ def insert_facade_contributors(self, repo_git): commits.cmt_author_raw_email ORDER BY hash - """).bindparams(repo_id=repo_id) + """).bindparams(repo_id=repo_id,since_date=last_collected_date) #Execute statement with session. result = execute_sql(new_contrib_sql) @@ -257,7 +266,6 @@ def insert_facade_contributors(self, repo_git): logger.debug("DEBUG: Got through the new_contribs") - facade_helper = FacadeHelper(logger) # sql query used to find corresponding cntrb_id's of emails found in the contributor's table # i.e., if a contributor already exists, we use it! resolve_email_to_cntrb_id_sql = s.sql.text(""" @@ -271,6 +279,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND commits.repo_id = :repo_id UNION SELECT DISTINCT @@ -286,7 +295,8 @@ def insert_facade_contributors(self, repo_git): contributors_aliases.alias_email = commits.cmt_author_raw_email AND contributors.cntrb_id = contributors_aliases.cntrb_id AND commits.repo_id = :repo_id - """).bindparams(repo_id=repo_id) + AND (:since_date is NULL OR commits.data_collection_date > :since_date) + """).bindparams(repo_id=repo_id,since_date=last_collected_date) result = execute_sql(resolve_email_to_cntrb_id_sql) diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py index 239b83dce9..255b34cf89 100644 --- a/augur/tasks/github/releases/core.py +++ b/augur/tasks/github/releases/core.py @@ -23,7 +23,7 @@ def get_release_inf(repo_id, release, tag_only): release_inf = { - 'release_id': release['id'], + 'release_id': str(release['id']).strip(), 'repo_id': repo_id, 'release_name': release['name'], 'release_description': release['description'] if release['description'] is not None else '', @@ -51,7 +51,7 @@ def get_release_inf(repo_id, release, tag_only): author = "nobody" date = "" release_inf = { - 'release_id': release['id'], + 'release_id': str(release['id']).strip(), 'repo_id': repo_id, 'release_name': release['name'], 'release_description': 'tag_only', @@ -67,17 +67,23 @@ def get_release_inf(repo_id, release, tag_only): def insert_release(session, logger, repo_id, owner, release, tag_only = False): - # Get current table values + # Get current table values with proper trimming logger.info('Getting release table values\n') query = session.query(Release.release_id).filter(Release.repo_id == repo_id) - release_id_data = execute_session_query(query, 'all')#pd.read_sql(release_id_data_sql, self.db, params={'repo_id': repo_id}) - release_id_data = [str(r_id).strip() for r_id in release_id_data]#release_id_data.apply(lambda x: x.str.strip()) + release_id_data = execute_session_query(query, 'all') + existing_release_ids = {str(r_id).strip() for r_id in release_id_data} # Put all data together in format of the table logger.info(f'Inserting release for repo with id:{repo_id}, owner:{owner}, release name:{release["name"]}\n') release_inf = get_release_inf(repo_id, release, tag_only) + + # Check if release already exists (with proper trimming) + new_release_id = str(release_inf['release_id']).strip() + if new_release_id in existing_release_ids: + logger.info(f"Release {new_release_id} already exists for repo {repo_id}, skipping insertion\n") + return - #Do an upsert + #Do an upsert with string field cleaning string_fields = ["release_name", "release_description", "release_author", "release_tag_name"] bulk_insert_dicts(logger, release_inf,Release,['release_id'], string_fields=string_fields) diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index db8d2239d4..d1209fadd0 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -241,8 +241,11 @@ def setup_periodic_tasks(sender, **kwargs): sender.add_periodic_task(thirty_days_in_seconds, non_repo_domain_tasks.s()) mat_views_interval = int(config.get_value('Celery', 'refresh_materialized_views_interval_in_days')) - logger.info(f"Scheduling refresh materialized view every night at 1am CDT") - sender.add_periodic_task(datetime.timedelta(days=mat_views_interval), refresh_materialized_views.s()) + if mat_views_interval > 0: + logger.info(f"Scheduling refresh materialized view every night at 1am CDT") + sender.add_periodic_task(datetime.timedelta(days=mat_views_interval), refresh_materialized_views.s()) + else: + logger.info(f"Refresh materialized view task is disabled.") # logger.info(f"Scheduling update of collection weights on midnight each day") # sender.add_periodic_task(crontab(hour=0, minute=0),augur_collection_update_weights.s()) diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 3a61e391a8..91e05c6fc1 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -29,6 +29,7 @@ from augur.tasks.util.collection_util import * from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_facade_weight_time_factor from augur.application.db.lib import execute_sql, get_session +from augur.application.config import AugurConfig RUNNING_DOCKER = os.environ.get('AUGUR_DOCKER_DEPLOY') == "1" @@ -140,7 +141,7 @@ def non_repo_domain_tasks(self): logger.info("Executing non-repo domain tasks") - enabled_phase_names = get_enabled_phase_names_from_config() + enabled_phase_names = get_enabled_phase_names_from_config(engine, logger) enabled_tasks = [] @@ -175,7 +176,7 @@ def core_task_success_util_gen(repo_git, full_collection): primary_enabled_phases.append(core_task_success_util_gen) primary_gitlab_enabled_phases.append(core_task_success_util_gen) - primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=15, gitlab_phases=primary_gitlab_enabled_phases) + primary_request = CollectionRequest("core",primary_enabled_phases,max_repo=40, days_until_collect_again=days_until_collect_again, gitlab_phases=primary_gitlab_enabled_phases) primary_request.get_valid_repos(session) return primary_request @@ -193,7 +194,8 @@ def secondary_task_success_util_gen(repo_git, full_collection): return secondary_task_success_util.si(repo_git) secondary_enabled_phases.append(secondary_task_success_util_gen) - request = CollectionRequest("secondary",secondary_enabled_phases,max_repo=60, days_until_collect_again=10) + + request = CollectionRequest("secondary",secondary_enabled_phases,max_repo=60, days_until_collect_again=days_until_collect_again) request.get_valid_repos(session) return request @@ -215,7 +217,7 @@ def facade_task_update_weight_util_gen(repo_git, full_collection): facade_enabled_phases.append(facade_task_update_weight_util_gen) - request = CollectionRequest("facade",facade_enabled_phases,max_repo=30, days_until_collect_again=10) + request = CollectionRequest("facade",facade_enabled_phases,max_repo=30, days_until_collect_again=days_until_collect_again) request.get_valid_repos(session) return request @@ -230,7 +232,7 @@ def ml_task_success_util_gen(repo_git, full_collection): ml_enabled_phases.append(ml_task_success_util_gen) - request = CollectionRequest("ml",ml_enabled_phases,max_repo=5, days_until_collect_again=40) + request = CollectionRequest("ml",ml_enabled_phases,max_repo=5, days_until_collect_again=days_until_collect_again) request.get_valid_repos(session) return request @@ -245,25 +247,32 @@ def augur_collection_monitor(self): #Get list of enabled phases - enabled_phase_names = get_enabled_phase_names_from_config() + enabled_phase_names = get_enabled_phase_names_from_config(engine, logger) enabled_collection_hooks = [] with DatabaseSession(logger, self.app.engine) as session: + # Get config values for collection intervals + config = AugurConfig(logger, session) + core_interval = config.get_value('Tasks', 'core_collection_interval_days') or 15 + secondary_interval = config.get_value('Tasks', 'secondary_collection_interval_days') or 10 + facade_interval = config.get_value('Tasks', 'facade_collection_interval_days') or 10 + ml_interval = config.get_value('Tasks', 'ml_collection_interval_days') or 40 + if primary_repo_collect_phase.__name__ in enabled_phase_names: - enabled_collection_hooks.append(build_primary_repo_collect_request(session, logger, enabled_phase_names)) + enabled_collection_hooks.append(build_primary_repo_collect_request(session, logger, enabled_phase_names, core_interval)) if secondary_repo_collect_phase.__name__ in enabled_phase_names: - enabled_collection_hooks.append(build_secondary_repo_collect_request(session, logger, enabled_phase_names)) + enabled_collection_hooks.append(build_secondary_repo_collect_request(session, logger, enabled_phase_names, secondary_interval)) #start_secondary_collection(session, max_repo=10) if facade_phase.__name__ in enabled_phase_names: #start_facade_collection(session, max_repo=30) - enabled_collection_hooks.append(build_facade_repo_collect_request(session, logger, enabled_phase_names)) + enabled_collection_hooks.append(build_facade_repo_collect_request(session, logger, enabled_phase_names, facade_interval)) if not RUNNING_DOCKER and machine_learning_phase.__name__ in enabled_phase_names: - enabled_collection_hooks.append(build_ml_repo_collect_request(session, logger, enabled_phase_names)) + enabled_collection_hooks.append(build_ml_repo_collect_request(session, logger, enabled_phase_names, ml_interval)) #start_ml_collection(session,max_repo=5) logger.info(f"Starting collection phases: {[h.name for h in enabled_collection_hooks]}") diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index bed73bd120..3f5ba75434 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -10,12 +10,13 @@ from augur.tasks.init.celery_app import celery_app as celery from augur.application.db.models import CollectionStatus, Repo from augur.application.db.util import execute_session_query -from augur.application.db.lib import get_section from augur.tasks.github.util.util import get_repo_weight_core, get_repo_weight_by_issue from augur.application.db import get_engine from augur.application.db.lib import execute_sql, get_session, get_active_repo_count, get_repo_by_repo_git from augur.tasks.util.worker_util import calculate_date_weight_from_timestamps from augur.tasks.util.collection_state import CollectionState +from augur.application.db.session import DatabaseSession +from augur.application.config import AugurConfig def get_list_of_all_users(): @@ -204,9 +205,19 @@ def get_repos_for_recollection(session, limit, hook, days_until_collect_again): return valid_repo_git_list -def get_enabled_phase_names_from_config(): +def get_enabled_phase_names_from_config(engine, logger): + with DatabaseSession(logger, engine) as session: + return get_enabled_phase_names_from_config_session(session, logger) - phase_options = get_section("Task_Routine") + +def get_enabled_phase_names_from_config_session(session, logger): + + config = AugurConfig(logger, session) + return get_enabled_phase_names_from_config_object(config) + + +def get_enabled_phase_names_from_config_object(config): + phase_options = config.get_section("Task_Routine") #Get list of enabled phases enabled_phase_names = [name for name, phase in phase_options.items() if phase == 1] @@ -597,37 +608,33 @@ def send_messages(self): for repo_git, full_collection in col_hook.repo_list: repo = get_repo_by_repo_git(repo_git) + platform_name = "github" + # this needs to be here and not up a level since it should be set/reset for each repo. + # otherwise a gitlab repo would reset it and cause subsequent github repos to use gitlab phases. + phases = None if "github" in repo.repo_git: - augur_collection_sequence = [] - for job in col_hook.phases: - #Add the phase to the sequence in order as a celery task. - #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git, full_collection)) - - #augur_collection_sequence.append(core_task_success_util.si(repo_git)) - #Link all phases in a chain and send to celery - augur_collection_chain = chain(*augur_collection_sequence) - task_id = augur_collection_chain.apply_async().task_id - - self.logger.info(f"Setting github repo {col_hook.name} status to collecting for repo: {repo_git}") - - #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated - yield repo_git, task_id, col_hook.name - else: - if col_hook.gitlab_phases is not None: - - augur_collection_sequence = [] - for job in col_hook.gitlab_phases: - #Add the phase to the sequence in order as a celery task. - #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git, full_collection)) - - #augur_collection_sequence.append(core_task_success_util.si(repo_git)) - #Link all phases in a chain and send to celery - augur_collection_chain = chain(*augur_collection_sequence) - task_id = augur_collection_chain.apply_async().task_id - - self.logger.info(f"Setting gitlab repo {col_hook.name} status to collecting for repo: {repo_git}") - - #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated - yield repo_git, task_id, col_hook.name + phases = col_hook.phases + # use default platform name + + elif "gitlab" in repo.repo_git: + platform_name = "gitlab" + if col_hook.gitlab_phases is None: + continue + phases = col_hook.gitlab_phases + + augur_collection_sequence = [] + for job in phases: + #Add the phase to the sequence in order as a celery task. + #The preliminary task creates the larger task chain + augur_collection_sequence.append(job(repo_git, full_collection)) + + #augur_collection_sequence.append(core_task_success_util.si(repo_git)) + #Link all phases in a chain and send to celery + augur_collection_chain = chain(*augur_collection_sequence) + task_id = augur_collection_chain.apply_async().task_id + + self.logger.info(f"Setting {platform_name} repo {col_hook.name} status to collecting for repo: {repo_git}") + + #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated + yield repo_git, task_id, col_hook.name + diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index f2fea35b36..dc59544aef 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -3,23 +3,19 @@ from httpx import Auth, Request, Response from random import choice - +import hashlib +from augur.util.keys import mask_key class RandomKeyAuth(Auth): - """Custom Auth class for httpx that randomly assigns an api key to each request + """Custom Auth class for httpx that randomly assigns an api key to each request. Attributes: - list_of_keys ([str]): list of keys which are randomly selected from on each request - header_name (str): name of header that the keys need to be set to + list_of_keys (List[str]): list of keys which are randomly selected from on each request + header_name (str): name of header that the keys need to be set to key_format (str): format string that defines the structure of the key and leaves a {} for the key to be inserted """ - - # pass a list of keys that are strings - # pass the name of the header that you would like to be set on the request - # Optionally pass the key_format. This is a string that contains a {} so the key can be added and applied to the header in the correct way. - # For example on github the keys are formatted like "token asdfasfdasf" where asdfasfdasf is the key. So for github - # the key_format="token {0}" + def __init__(self, list_of_keys: List[str], header_name: str, logger, key_format: Optional[str] = None): self.list_of_keys = list_of_keys self.header_name = header_name @@ -27,27 +23,26 @@ def __init__(self, list_of_keys: List[str], header_name: str, logger, key_format self.logger = logger def auth_flow(self, request: Request) -> Generator[Request, Response, None]: + """Attach a randomly selected API key to the request headers.""" + if not self.list_of_keys: + self.logger.error("No valid keys available to make a request.") + yield request + return + + key_value = choice(self.list_of_keys) - # the choice function is from the random library, and gets a random value from a list - # this gets a random key from the list - - if self.list_of_keys: - key_value = choice(self.list_of_keys) - self.logger.debug(f'Key value used in request: {key_value}') - # formats the key string into a format GitHub will accept - - if self.key_format: - key_string = self.key_format.format(key_value) - else: - key_string = key_value - - # set the headers of the request with the new key - request.headers[self.header_name] = key_string - #self.logger.info(f"List of Keys: {self.list_of_keys}") + # Log only masked or hashed form, never the full key + masked = mask_key(key_value) + self.logger.debug(f"Key used for request (masked): {masked}") + # Apply formatting if needed + if self.key_format: + key_string = self.key_format.format(key_value) else: - self.logger.error(f"There are no valid keys to make a request with: {self.list_of_keys}") + key_string = key_value - # sends the request back with modified headers + # Set header + request.headers[self.header_name] = key_string + # sends the request back with modified headers # basically it saves our changes to the request object yield request diff --git a/augur/templates/repo-info.j2 b/augur/templates/repo-info.j2 index 311daa45f7..2738d70e2a 100644 --- a/augur/templates/repo-info.j2 +++ b/augur/templates/repo-info.j2 @@ -5,21 +5,7 @@ {% if repo.repo_id %}

Report for: {{ repo.repo_name|title }}

{{ repo.repo_git }}

- {% for report in reports %} -

{{ report|replace("_", " ")|title }}

- {% for image in images[report] %} -
-
-
-
-
- -
-
- {% endfor %} - {% endfor %} + {% else %}

Repository {{ repo_id }} not found

{% endif %} diff --git a/augur/util/keys.py b/augur/util/keys.py new file mode 100644 index 0000000000..31ef63d0cb --- /dev/null +++ b/augur/util/keys.py @@ -0,0 +1,5 @@ +def mask_key(key: str, first: int = 6, last: int = 3, stars: int = 6) -> str: + """Mask key except for the first and last few characters.""" + if not isinstance(key, str) or len(key) <= (first + last): + return "*" * stars + return f"{key[:first]}{'*' * stars}{key[-last:]}" \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index b32f0a1696..f0ef41015b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,6 +73,7 @@ services: - REDIS_CONN_STRING=redis://redis:6379 - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-augur_vhost} - CONFIG_LOCATION=/config/config.yml + - CONFIG_DATADIR=/config - CACHE_DATADIR=/cache - CACHE_LOCKDIR=/cache - CELERYBEAT_SCHEDULE_DB=/tmp/celerybeat-schedule.db diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 0a05daf848..5a8bfaaa3e 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -20,7 +20,9 @@ RUN go install github.com/ossf/scorecard/v5@v5.1.1 \ FROM python:3.11-slim-bullseye LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.3" + +ARG VERSION +LABEL version=${VERSION} ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/bin/:/usr/local/bin:/usr/lib:${PATH}" diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index 6558fe44ec..e4393fe0b3 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -2,7 +2,9 @@ FROM postgres:16 LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.3" + +ARG VERSION +LABEL version=${VERSION} ENV POSTGRES_DB="test" ENV POSTGRES_USER="augur" diff --git a/docker/empty_database/Dockerfile b/docker/empty_database/Dockerfile deleted file mode 100644 index d4e3122450..0000000000 --- a/docker/empty_database/Dockerfile +++ /dev/null @@ -1,66 +0,0 @@ -from postgres:16 AS builder - -ENV DEBIAN_FRONTEND=noninteractive - -# Install uv (https://docs.astral.sh/uv/guides/integration/docker/#installing-uv) -COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -ENV UV_COMPILE_BYTECODE=1 -# The uv package cache will be on a cache volume, so can't be linked -ENV UV_LINK_MODE=copy -# Assert that the lockfile (uv.lock) is up-to-date. Use `uv lock` to update it -# manually if this fails the container build. -ENV UV_LOCKED=1 - -WORKDIR /augur - -COPY pyproject.toml . -COPY uv.lock . -COPY .python-version . - -# Install augur's dependencies early to take advantage of build cache -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --no-install-project --no-dev - -# Copy in the actual code -# The RUN line below ensure that permissions are set correctly. -# This is the equivalent of the following docker --chmod flags, but done in a way thats compatible with podman. -# This can be removed once https://github.com/containers/buildah/issues/6066 or relevant equivalent is fixed -# - u=rw,u+X: user can read and write all files/dirs and execute directories -# - go=r,go+X: group and others can read all files/dirs and execute directories -COPY README.md . -COPY LICENSE . -COPY alembic.ini . -COPY augur/ augur/ -COPY metadata.py . -COPY scripts/ scripts/ - -RUN find augur -type d -exec chmod u=rwx,go=rx {} + && find augur -type f -exec chmod u=rw,go=r {} + - -RUN find scripts -exec chmod u=rwx,go=rx {} + - -# Install the main project -RUN --mount=type=cache,target=/root/.cache/uv \ - uv sync --no-dev - -# We aren't going to activate the virtualenv (manually or via uv run), so we -# need adjust the PATH -ENV PATH="/augur/.venv/bin:${PATH}" - -ENV POSTGRES_DB="augur" -ENV POSTGRES_USER="augur" -ENV POSTGRES_PASSWORD="augur" -ENV AUGUR_DB="postgresql+psycopg2://augur:augur@localhost:5432/augur" -# ENV PGDATA="/var/lib/postgresql/data" - -RUN set -e && \ - gosu postgres initdb && \ - gosu postgres pg_ctl -D "$PGDATA" -o "-c listen_addresses='localhost'" -w start && \ - gosu postgres psql -c "CREATE USER ${POSTGRES_USER} WITH SUPERUSER PASSWORD '${POSTGRES_PASSWORD}';" && \ - gosu postgres psql -c "CREATE DATABASE ${POSTGRES_DB} OWNER ${POSTGRES_USER};" && \ - augur db create-schema && \ - gosu postgres pg_ctl -D "$PGDATA" -m fast -w stop - - -FROM postgres:16 - -COPY --from=builder /var/lib/postgresql/data /var/lib/postgresql/data diff --git a/docker/keyman/Dockerfile b/docker/keyman/Dockerfile index 72c46ba225..3fe1996223 100644 --- a/docker/keyman/Dockerfile +++ b/docker/keyman/Dockerfile @@ -1,7 +1,9 @@ FROM python:3.11.12-alpine LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.3" + +ARG VERSION +LABEL version=${VERSION} RUN pip install --no-cache-dir --upgrade pip diff --git a/docker/rabbitmq/Dockerfile b/docker/rabbitmq/Dockerfile index ad86dfebb7..69babd3be8 100644 --- a/docker/rabbitmq/Dockerfile +++ b/docker/rabbitmq/Dockerfile @@ -1,7 +1,9 @@ -FROM rabbitmq:3.12-management-alpine +FROM rabbitmq:4.1-management-alpine LABEL maintainer="574/augur@simplelogin.com" -LABEL version="0.90.0" + +ARG VERSION +LABEL version=${VERSION} ARG RABBIT_MQ_DEFAULT_USER=augur ARG RABBIT_MQ_DEFAULT_PASSWORD=password123 diff --git a/docker/rabbitmq/definitions.json b/docker/rabbitmq/definitions.json index 1cd8cc172e..d5fd9faef2 100644 --- a/docker/rabbitmq/definitions.json +++ b/docker/rabbitmq/definitions.json @@ -1,5 +1,5 @@ { - "rabbit_version": "3.12", + "rabbit_version": "4.1", "users": [ { "name": "", diff --git a/docs/source/conf.py b/docs/source/conf.py index 45966f19ec..94921bd5d1 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,9 +23,11 @@ here = os.path.abspath(os.path.dirname(__file__)) -exec(open(os.path.join(here, "../../metadata.py")).read()) +# Add the project root (two levels up: docs/source → augur) +sys.path.insert(0, os.path.abspath(os.path.join(here, '../..'))) -sys.path.insert(0, os.path.abspath('../../../augur')) +# Now importing variables from metadata.py +from metadata import __copyright__, __release__, __version__ # -- General configuration ------------------------------------------------ diff --git a/docs/source/development-guide/create-a-metric/api-development.rst b/docs/source/development-guide/create-a-metric/api-development.rst index 834b42e8e0..05e1ebb977 100644 --- a/docs/source/development-guide/create-a-metric/api-development.rst +++ b/docs/source/development-guide/create-a-metric/api-development.rst @@ -133,10 +133,6 @@ There is also, generally, a block in a standard metric for pulling data by a rep 'begin_date': begin_date, 'end_date': end_date}) return results -Existing Visualization Metrics Files: --------------------------------------------- -1. augur/routes/contributor_reports.py -2. augur/routes/pull_request_reports.py Existing Metrics Files: -------------------------------------------- diff --git a/docs/source/docker/docker-compose.rst b/docs/source/docker/docker-compose.rst index d96476c392..1bc3a25a6f 100644 --- a/docs/source/docker/docker-compose.rst +++ b/docs/source/docker/docker-compose.rst @@ -26,7 +26,7 @@ This section of the documentation details how to use Augur's Docker Compose conf .. warning:: - Don't forget to provide your external database credentials in a file called ``.env`` file. Make sure the following environment variables are specified. + Don't forget to provide your external database credentials in a file called ``.env`` file. Make sure all the following environment variables are specified, keep placeholder values if you don't need some of them. Don't specify AUGUR_DB if you want the docker database to be used. Example .env: diff --git a/docs/source/docker/getting-started.rst b/docs/source/docker/getting-started.rst index e747bbb304..c413d9ed45 100644 --- a/docs/source/docker/getting-started.rst +++ b/docs/source/docker/getting-started.rst @@ -31,7 +31,7 @@ the following resources (or more): - 10 GB RAM Clone the Augur repository and create a .env file in the top level directory -with the following fields: +with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python diff --git a/docs/source/docker/quick-start.rst b/docs/source/docker/quick-start.rst index b7d7b7cc7e..c7530f6ae9 100644 --- a/docs/source/docker/quick-start.rst +++ b/docs/source/docker/quick-start.rst @@ -9,7 +9,7 @@ Before you get off to such a quick start, go ahead and git checkout main - 4. Create a .env file in the top level directory with the following fields: + 4. Create a .env file in the top level directory with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python diff --git a/docs/source/getting-started/Welcome.rst b/docs/source/getting-started/Welcome.rst index aaec004651..2808176bb5 100644 --- a/docs/source/getting-started/Welcome.rst +++ b/docs/source/getting-started/Welcome.rst @@ -7,12 +7,15 @@ Now, as a new member, it can be overwhelming to navigate and sift through all th A few first things to do: -1. Join the slack channel, that's the fastest way to join and be a part of the community. +1. Join the slack channel, that's the fastest way to join and be a part of the community. +You can join using this invite link: +`Join the CHAOSS Slack Workspace `_ .. image:: images/slack.jpg :width: 400 :alt: "Slack logo" - + :target: https://join.slack.com/t/chaoss-workspace/shared_invite/zt-3hmaf3urr-boLCd7nRgcAvvfbWcqJJVw + 2. Introduce yourself to the #newcomers channel. Say hi, it'd help others get to know you and point you in the right direction. In case you're unsure, here's a format you can use: * Name diff --git a/docs/source/getting-started/collecting-data.rst b/docs/source/getting-started/collecting-data.rst index ab7ce215c6..cb21922047 100644 --- a/docs/source/getting-started/collecting-data.rst +++ b/docs/source/getting-started/collecting-data.rst @@ -88,11 +88,17 @@ Celery Configuration **We strongly recommend leaving the default celery blocks generated by the installation process, but if you would like to know more, or fine-tune them to your needs, read on.** -The celery monitor is responsible for generating the tasks that will tell the other worker processes what data to collect, and how. The ``Celery`` block has 2 keys; one for memory cap and one for materialized views interval. -- ``worker_process_vmem_cap``, float between zero and one that determines the maximum percentage of total memory to use for worker processes +The celery monitor is responsible for generating the tasks that will tell the other worker processes what data to collect, and how. The ``Celery`` block has several keys: + +- ``core_worker_count``, the number of workers to spawn to run the core tasks. +- ``secondary_worker_count``, the number of workers to spawn to run the secondary tasks. +- ``facade_worker_count``, the number of workers to spawn to run the facade tasks. - ``refresh_materialized_views_interval_in_days``, number of days to wait between refreshes of materialized views. +If you choose, you can also adjust the values in the ``Tasks`` block if you would like to control when tasks should be re-run on a given repository. +This is specified as a number of days since the last successful run. + Adding repos for collection ----------------------------- diff --git a/docs/source/getting-started/command-line-interface/db.rst b/docs/source/getting-started/command-line-interface/db.rst index b754f2e067..a810f1b9d7 100644 --- a/docs/source/getting-started/command-line-interface/db.rst +++ b/docs/source/getting-started/command-line-interface/db.rst @@ -78,14 +78,14 @@ Example usage\: .. code-block:: bash # contents of repos.csv - 10,https://github.com/chaoss/augur.git - 10,https://github.com/chaoss/grimoirelab.git - 20,https://github.com/chaoss/wg-evolution.git - 20,https://github.com/chaoss/wg-risk.git - 20,https://github.com/chaoss/wg-common.git - 20,https://github.com/chaoss/wg-value.git - 20,https://github.com/chaoss/wg-diversity-inclusion.git - 20,https://github.com/chaoss/wg-app-ecosystem.git + https://github.com/chaoss/augur.git,10 + https://github.com/chaoss/grimoirelab.git,10 + https://github.com/chaoss/wg-evolution.git,20 + https://github.com/chaoss/wg-risk.git,20 + https://github.com/chaoss/wg-common.git,20 + https://github.com/chaoss/wg-value.git,20 + https://github.com/chaoss/wg-diversity-inclusion.git,20 + https://github.com/chaoss/wg-app-ecosystem.git,20 # to add repos to the database $ augur db add-repos repos.csv diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index d2a79c4f71..41bc1be4dc 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -31,9 +31,7 @@ Required: -**Python 3.9 is not yet supported because TensorFlow, which we use in our machine learning workers, does not yet support Python 3.9.** - -Our REST API & data collection workers write in Python 3.6. We query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository, so GitLab and GitHub access tokens are **required** for data collection. +Our REST API & data collection workers query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository. Values for GitLab and GitHub access tokens are **required** for data collection and must be provided (an invalid token can be provided if you don't plan to use one platform) . Optional: diff --git a/docs/source/getting-started/using-docker.rst b/docs/source/getting-started/using-docker.rst index cc5e23896c..c1c693eda2 100644 --- a/docs/source/getting-started/using-docker.rst +++ b/docs/source/getting-started/using-docker.rst @@ -10,7 +10,7 @@ the following resources (or more). 1. Clone the Augur repository https://github.com/chaoss/augur -2. Create a .env file in the top level directory with the following fields: +2. Create a ``.env`` file in the top level directory with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python @@ -35,7 +35,7 @@ or podman compose up --build -And augur should be up and running! Over time, you may decide that you want to download and run newer releases of Augur. It is critical that your `.env` file remains configured to use the same database name and password; though you can change the password if you understand how to connect to a database running inside a Docker container on your computer. +And augur should be up and running! Over time, you may decide that you want to download and run newer releases of Augur. It is critical that your ``.env`` file remains configured to use the same database name and password; though you can change the password if you understand how to connect to a database running inside a Docker container on your computer. Rebuilding Augur in Docker ---------------------------- diff --git a/docs/source/procedures/creating-releases.rst b/docs/source/procedures/creating-releases.rst index 007db72441..7ef2b32a9c 100644 --- a/docs/source/procedures/creating-releases.rst +++ b/docs/source/procedures/creating-releases.rst @@ -2,49 +2,109 @@ The Augur Release Process ========================= The first step to releasing any changes is to have changes in the first place. -Augur's `CONTRIBUTING.md `__ file contains all the information that is needed to get started with topics like reporting issues, contributing code, and understanding the code review process. +Augur's `CONTRIBUTING.md `__ file +contains all the information that is needed to get started with topics like +reporting issues, contributing code, and understanding the code review process. This document outlines how these changes end up in an Augur release after they are merged into the `main` branch. +Release Workflow +---------------- - -Release workflow: -Starting after version 0.89.3, Augur follows a workflow similar to those you may already be familiar with (such as github flow and git flow). The Augur workflow has two long-lived branches, `main` and `release` and is designed such that changes only flow in one direction - from main into release. +Starting after version **0.89.3**, Augur follows a workflow similar to those you may already +be familiar with (such as GitHub Flow and Git Flow). The Augur workflow has two long-lived branches, +`main` and `release`, and is designed such that changes only flow in one direction — from `main` into `release`. Branches +-------- + +**main** -`main` -The `main` branch is the primary development branch that is the target for all new pull requests. At any given point in time, this branch represents the best approximation of what the next upcoming release will look like. Since this is the active development branch, changes happen more frequently and this branch should be considered to be less stable than the `release` branch due to the possibility of breaking changes being made (and potentially reverted) between releases. It is not recommended for production deployment and is primarily intended for use by Augur contributors running their own copies against test data for development purposes. +The `main` branch is the primary development branch that is the target for all new pull requests. +At any given point in time, this branch represents the best approximation of what the next upcoming +release will look like. Since this is the active development branch, changes happen more frequently +and this branch should be considered to be less stable than the `release` branch due to the possibility +of breaking changes being made (and potentially reverted) between releases. It is not recommended for +production deployment and is primarily intended for use by Augur contributors running their own copies +against test data for development purposes. -`release` -The `release` branch is where all augur versions (after 0.89.3) are tagged. Each commit on this branch represents either a hotfix to the prior release or a new major or minor version. +**release** -Currently, Augur only officially supports the last-released version represented by the latest **release** tag. In most cases, the latest commit on the `release` branch is made immediately prior to a release, but always rely on the latest tagged release, not the release branch in production. +The `release` branch is where all Augur versions (after 0.89.3) are tagged. Each commit on this branch +represents either a hotfix to the prior release or a new major or minor version. + +Currently, Augur only officially supports the last-released version represented by the latest **release** tag. +In most cases, the latest commit on the `release` branch is made immediately prior to a release, but always rely +on the latest tagged release, not the `release` branch in production. .. note:: - If future needs require supporting multiple Augur versions concurrently, individual numbered release branches may be made from this central `release`` branch to allow any hotfixes to be applied to each supported version independently of the others. + If future needs require supporting multiple Augur versions concurrently, individual numbered + release branches may be made from this central `release` branch to allow any hotfixes to be applied + to each supported version independently of the others. The Release Process +------------------- + +When the next release is set to be cut, some preparation steps need to take place first. These include: + +- Ensuring all features planned for that release are merged, and any unrelated changes are delayed (as appropriate) until after the release. +- Creating a Pull Request to update any applicable metadata (such as version information and changelogs) on the `main` branch. + +Version Management (Updated) +---------------------------- + +Starting from version **0.90.0**, Augur now uses a **single source of truth** for its version information, +defined in `metadata.py`. + +Previously, the version number needed to be manually updated in several different places during a release, including: -When the next release is set to be cut, some preparation steps need to take place first, these include: -- Ensuring all features planned for that release are merged and any unrelated changes are delayed (as appropriate) until after the release. -- Creating a Pull Request to update any applicable metadata (such as version information and Changelogs) on the `main` branch. +- ``pyproject.toml`` (for Python packaging) +- Dockerfiles (used for building and tagging images) +- GitHub Actions workflow files (e.g., ``.github/workflows/build_docker.yml``) +- Any scripts or documentation pages referencing specific versions -Once all release preparation has been completed, a new Pull Request can be created to merge the main branch into the `release 'branch. This creates a final review opportunity and allows for another run of (potentially more stringent) CI jobs compared to those run on `main`, catching issues that may have come up throughout the various merges or in the process of preparing for release. +This manual process increased the chance of version mismatches between code, Docker images, and releases. -After this PR is merged, a tag is created that points to the commit on the `release` branch, effectively labeling it so that it can be returned to later if needed. This labeling process can also be the basis for additional CI jobs that build and upload the released code to distribution platforms such as Docker Hub or the GitHub Container Registry +Now, this has been **fully centralized**: + +- The version number is declared once in ``metadata.py`` as ``__version__``. +- A helper script ``get_version.py`` reads this value and dynamically injects it into Docker builds via a build argument. +- The CI/CD pipeline (GitHub Actions) also reads the same version from ``metadata.py`` when tagging builds and Docker images. + +This ensures that all parts of Augur — including Python packaging, Docker images, and release artifacts — +use the **exact same version**, automatically. + +Therefore, before tagging a new release, only the version in ``metadata.py`` needs to be updated. +All other build and deployment steps automatically consume this version during the release process. + +Once all release preparation has been completed, a new Pull Request can be created to merge the `main` +branch into the `release` branch. This creates a final review opportunity and allows for another run of +(potentially more stringent) CI jobs compared to those run on `main`, catching issues that may have come up +throughout the various merges or during the process of preparing for release. + +After this PR is merged, a tag is created that points to the commit on the `release` branch, +effectively labeling it so that it can be returned to later if needed. This labeling process can +also be the basis for additional CI jobs that build and upload the released code to distribution +platforms such as Docker Hub or the GitHub Container Registry. Why? +---- This is done to solve a number of problems: -- having changes moving in two directions at once (i.e. features coming from main, and hotfixes coming from release) was often confusing and increased the odds that a change would be missed, such as being shipped as a hotfix but not merged into the main codebase - leading to a regression in the next release. +- Having changes moving in two directions at once (i.e. features coming from `main`, and hotfixes coming from `release`) + was often confusing and increased the odds that a change would be missed, such as being shipped as a hotfix + but not merged into the main codebase — leading to a regression in the next release. + + +Special Case: Hotfixes +---------------------- +If the fix is a hotfix: -Special case: Hotfixes -if the fix was a hotfix: -- changelog updates and other metadata changes should be included as part of the PR -- this is where mergeify or something helps re-create the PR targeting the release branch directly. at which point the release process is followed +- Changelog updates and other metadata changes should be included as part of the PR. +- This is where tools like **Mergeify** can help re-create the PR targeting the `release` branch directly, + at which point the regular release process is followed. diff --git a/pyproject.toml b/pyproject.toml index c086babe25..ddaed4301d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,12 +95,21 @@ dependencies = [ [dependency-groups] dev = [ - "tox==3.24.4", - "pytest==6.2.5", - "toml>=0.10.2", - "ipdb==0.13.9", - {include-group = "docs"}, + { include-group = "lint" }, + { include-group = "test" }, + { include-group = "debug" }, + { include-group = "docs" }, ] +lint = [ + "pylint", + "mypy>=1.18.2", + "types-requests>=2.31.0.6", + "types-pyyaml>=6.0.12.20250915", + "types-python-dateutil>=2.9.0.20251008", + "types-toml>=0.10.8.20240310", +] +test = ["tox>=4.0", "tox-uv", "pytest"] +debug = ["ipdb==0.13.9"] docs = [ "docutils==0.20.1", # setuptools is needed for pkg_resources due to sphinxcontrib-redoc @@ -149,3 +158,37 @@ constraint-dependencies = [ # https://docs.python.org/3.10/whatsnew/3.10.html#removed "graphql-server-core>1.1.1", ] + +[tool.tox] +legacy_tox_ini = """ + [tox] + envlist = py{,311}-{classes} + skip_missing_interpreters = true + requires = tox-uv + + [testenv] + passenv = AUGUR_* + whitelist_externals = make + /bin/bash + deps = + pytest + setenv = + AUGUR_LOG_DEBUG = 0 + AUGUR_LOG_QUIET = 1 + commands = + application: pytest tests --ignore=tests/test_routes --ignore=tests/test_workers + metric-routes: python tests/test_routes/runner.py + workers: pytest tests/test_workers/ + classes: pytest tests/test_classes/ + worker-persistance: pytest test/test_workers/worker_persistance/ + + [pytest] + addopts = -ra -s +""" + +[tool.mypy] +files = ['augur/application/db/*.py'] +ignore_missing_imports = true +follow_imports = "skip" +disallow_untyped_defs = false +exclude_gitignore = true diff --git a/tests/test_applicaton/test_config/test_config.py b/tests/test_applicaton/test_config/test_config.py index b6b69f8914..2194b122fc 100644 --- a/tests/test_applicaton/test_config/test_config.py +++ b/tests/test_applicaton/test_config/test_config.py @@ -168,11 +168,9 @@ def test_config_is_section_in_config(test_db_config, test_db_engine): def test_config_add_settings(test_db_config, test_db_engine): try: - ip_standard = {"section_name": "Network", "setting_name": "ip_standard", "value": "ipv4"} - subnet_mask = {"section_name": "Network", "setting_name": "subnet_mask", "value": "/24"} - settings = [ip_standard, subnet_mask] - test_db_config.add_or_update_settings(settings) + test_db_config.add_value("Network", "ip_standard", "ipv4") + test_db_config.add_value("Network", "subnet_mask", "/24") with test_db_engine.connect() as connection: @@ -208,12 +206,7 @@ def test_config_update_settings(test_db_config, test_db_engine): new_ip = "1.1.1.1" new_subnet_mask = "/16" - ip_standard_updated["value"] = new_ip_standard - ip_updated["value"] = new_ip - subnet_mask_updated["value"] = new_subnet_mask - all_data = [ip_standard, ip, subnet_mask] - updated_settings = [ip_standard_updated, ip_updated, subnet_mask_updated] with test_db_engine.connect() as connection: @@ -223,7 +216,9 @@ def test_config_update_settings(test_db_config, test_db_engine): connection.execute(query, **data) - test_db_config.add_or_update_settings(updated_settings) + test_db_config.add_value("Network", "ip_standard", new_ip_standard) + test_db_config.add_value("Network", "ip", new_ip) + test_db_config.add_value("Network", "subnet_mask", new_subnet_mask) with test_db_engine.connect() as connection: @@ -404,39 +399,6 @@ def test_remove_section(test_db_config, test_db_engine): -def test_create_default_config(test_db_config, test_db_engine): - - from augur.application.config import default_config - - test_db_config.create_default_config() - - config_sections = list(default_config.keys()) - - try: - - with test_db_engine.connect() as connection: - - result = connection.execute("""SELECT * FROM augur_operations.config""").fetchall() - - assert result is not None - assert len(result) > 0 - - result_sections = [] - for row in result: - dict_data = dict(row) - - if dict_data["section_name"] not in result_sections: - result_sections.append(dict_data["section_name"]) - - assert dict_data["section_name"] and dict_data["setting_name"] - - assert len(config_sections) == len(result_sections) - - finally: - with test_db_engine.connect() as connection: - connection.execute("""DELETE FROM augur_operations.config""") - - diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py new file mode 100644 index 0000000000..69fe19017f --- /dev/null +++ b/tests/test_classes/test_config_stores.py @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: MIT +import pytest +from unittest.mock import Mock + +from augur.application.config import JsonConfig, DatabaseConfig, NotWriteableException, AugurConfig, default_config + + +@pytest.fixture +def mock_logger(): + return Mock() + +@pytest.fixture +def mock_session(): + return Mock() + + +def test_jsonconfig_readonly_flags(mock_logger): + cfg = JsonConfig({"A": {"x": 1}}, mock_logger) + assert cfg.writable is False + assert cfg.empty is False + + +def test_jsonconfig_empty_true_false(mock_logger): + assert JsonConfig({}, mock_logger).empty is True + assert JsonConfig({"A": {}}, mock_logger).empty is False + + +def test_jsonconfig_retrieve_has_get(mock_logger): + data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} + cfg = JsonConfig(data, mock_logger) + + # retrieve full dict + assert cfg.retrieve_dict() is data + + # has/get section + assert cfg.has_section("Alpha") is True + assert cfg.has_section("Missing") is False + assert cfg.get_section("Alpha") == {"a": 1, "b": "str"} + assert cfg.get_section("Missing") is None + + # has/get value + assert cfg.has_value("Alpha", "a") is True + assert cfg.has_value("Alpha", "missing") is False + assert cfg.has_value("Missing", "a") is False + assert cfg.get_value("Alpha", "a") == 1 + assert cfg.get_value("Alpha", "missing") is None + assert cfg.get_value("Missing", "a") is None + + +@pytest.mark.parametrize( + "callable_name, args, kwargs", + [ + ("load_dict", ({"X": {"y": 2}},), {"ignore_existing": False}), + ("clear", tuple(), {}), + ("remove_section", ("X",), {}), + ("create_section", ("X", {"y": 2}), {"ignore_existing": False}), + ("remove_value", ("X", "y"), {}), + ("add_value", ("X", "y", 2), {"ignore_existing": False}), + ], +) +def test_jsonconfig_mutations_raise_not_writable(mock_logger, callable_name, args, kwargs): + cfg = JsonConfig({"A": {"x": 1}}, mock_logger) + with pytest.raises(NotWriteableException): + getattr(cfg, callable_name)(*args, **kwargs) + + +def test_dict_to_config_table_happy_path(): + input_dict = { + "Section1": {"alpha": 1, "beta": "x"}, + "Section2": {"gamma": False, "delta": 3.14}, + } + + rows = DatabaseConfig._dict_to_config_table(input_dict) + + # Expect a list of row dicts with section_name, setting_name, value + assert isinstance(rows, list) + expected = [ + { + "section_name": "Section1", + "setting_name": "alpha", + "value": 1, + "type": "int" + }, + { + "section_name": "Section1", + "setting_name": "beta", + "value": "x", + "type": "str" + }, + { + "section_name": "Section2", + "setting_name": "gamma", + "value": False, + "type": "bool" + }, + { + "section_name": "Section2", + "setting_name": "delta", + "value": 3.14, + "type": "float" + }, + ] + assert rows == expected + + + +def test_fetching_real_defaults(mock_logger, mock_session): + cfg = AugurConfig(mock_logger, mock_session) + cfg.config_sources = [JsonConfig(default_config, mock_logger)] + + assert cfg.get_value("Redis", "cache_group") == 0 diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv index 8967ae2142..fb537d4949 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv @@ -1,8 +1,8 @@ -10,https://github.com/chaoss/augur.git -10,https://github.com/chaoss/grimoirelab.git -20,https://github.com/chaoss/wg-evolution.git -20,https://github.com/chaoss/wg-risk.git -20,https://github.com/chaoss/wg-common.git -20,https://github.com/chaoss/wg-value.git -20,https://github.com/chaoss/wg-diversity-inclusion.git -20,https://github.com/chaoss/wg-app-ecosystem.git \ No newline at end of file +https://github.com/chaoss/augur.git,10 +https://github.com/chaoss/grimoirelab.git,10 +https://github.com/chaoss/wg-evolution.git,20 +https://github.com/chaoss/wg-risk.git,20 +https://github.com/chaoss/wg-common.git,20 +https://github.com/chaoss/wg-value.git,20 +https://github.com/chaoss/wg-diversity-inclusion.git,20 +https://github.com/chaoss/wg-app-ecosystem.git,20 diff --git a/tox.ini b/tox.ini deleted file mode 100644 index ceeb03155b..0000000000 --- a/tox.ini +++ /dev/null @@ -1,27 +0,0 @@ -#SPDX-License-Identifier: MIT -# tox (https://tox.readthedocs.io/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - -[tox] -envlist = py{,36,37,38}-{metric-routes,application, workers} -skip_missing_interpreters = true - -[testenv] -passenv = AUGUR_* -whitelist_externals = make - /bin/bash -deps = - pytest -setenv = - AUGUR_LOG_DEBUG = 0 - AUGUR_LOG_QUIET = 1 -commands = - application: pytest tests --ignore=tests/test_routes --ignore=tests/test_workers - metric-routes: python tests/test_routes/runner.py - workers: pytest tests/test_workers/ - worker-persistance: pytest test/test_workers/worker_persistance/ - -[pytest] -addopts = -ra -s diff --git a/uv.lock b/uv.lock index 1c74a61de7..37df99ba32 100644 --- a/uv.lock +++ b/uv.lock @@ -215,17 +215,28 @@ dependencies = [ ] [package.dev-dependencies] +debug = [ + { name = "ipdb" }, +] dev = [ { name = "docutils" }, { name = "ipdb" }, + { name = "mypy" }, + { name = "pylint" }, { name = "pytest" }, { name = "setuptools" }, { name = "sphinx" }, { name = "sphinx-rtd-theme" }, { name = "sphinxcontrib-openapi" }, { name = "sphinxcontrib-redoc" }, - { name = "toml" }, - { name = "tox" }, + { name = "tox", version = "4.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tox", version = "4.32.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tox-uv", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tox-uv", version = "1.29.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "types-python-dateutil" }, + { name = "types-pyyaml" }, + { name = "types-requests" }, + { name = "types-toml" }, ] docs = [ { name = "docutils" }, @@ -235,6 +246,21 @@ docs = [ { name = "sphinxcontrib-openapi" }, { name = "sphinxcontrib-redoc" }, ] +lint = [ + { name = "mypy" }, + { name = "pylint" }, + { name = "types-python-dateutil" }, + { name = "types-pyyaml" }, + { name = "types-requests" }, + { name = "types-toml" }, +] +test = [ + { name = "pytest" }, + { name = "tox", version = "4.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tox", version = "4.32.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tox-uv", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tox-uv", version = "1.29.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] [package.metadata] requires-dist = [ @@ -313,17 +339,24 @@ requires-dist = [ ] [package.metadata.requires-dev] +debug = [{ name = "ipdb", specifier = "==0.13.9" }] dev = [ { name = "docutils", specifier = "==0.20.1" }, { name = "ipdb", specifier = "==0.13.9" }, - { name = "pytest", specifier = "==6.2.5" }, + { name = "mypy", specifier = ">=1.18.2" }, + { name = "pylint" }, + { name = "pytest" }, { name = "setuptools" }, { name = "sphinx", specifier = "==7.2.6" }, { name = "sphinx-rtd-theme", specifier = "==2.0.0" }, { name = "sphinxcontrib-openapi", specifier = "==0.8.3" }, { name = "sphinxcontrib-redoc", specifier = "==1.6.0" }, - { name = "toml", specifier = ">=0.10.2" }, - { name = "tox", specifier = "==3.24.4" }, + { name = "tox", specifier = ">=4.0" }, + { name = "tox-uv" }, + { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, + { name = "types-requests", specifier = ">=2.31.0.6" }, + { name = "types-toml", specifier = ">=0.10.8.20240310" }, ] docs = [ { name = "docutils", specifier = "==0.20.1" }, @@ -333,6 +366,19 @@ docs = [ { name = "sphinxcontrib-openapi", specifier = "==0.8.3" }, { name = "sphinxcontrib-redoc", specifier = "==1.6.0" }, ] +lint = [ + { name = "mypy", specifier = ">=1.18.2" }, + { name = "pylint" }, + { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, + { name = "types-requests", specifier = ">=2.31.0.6" }, + { name = "types-toml", specifier = ">=0.10.8.20240310" }, +] +test = [ + { name = "pytest" }, + { name = "tox", specifier = ">=4.0" }, + { name = "tox-uv" }, +] [[package]] name = "babel" @@ -434,11 +480,27 @@ sdist = { url = "https://files.pythonhosted.org/packages/10/ed/7e8b97591f6f45617 name = "cachetools" version = "5.5.2" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, ] +[[package]] +name = "cachetools" +version = "6.2.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/cc/7e/b975b5814bd36faf009faebe22c1072a1fa1168db34d285ef0ba071ad78c/cachetools-6.2.1.tar.gz", hash = "sha256:3f391e4bd8f8bf0931169baf7456cc822705f4e2a31f840d218f445b9a854201", size = 31325, upload-time = "2025-10-12T14:55:30.139Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/c5/1e741d26306c42e2bf6ab740b2202872727e0f606033c9dd713f8b93f5a8/cachetools-6.2.1-py3-none-any.whl", hash = "sha256:09868944b6dde876dfd44e1d47e18484541eaf12f26f29b7af91b26cc892d701", size = 11280, upload-time = "2025-10-12T14:55:28.382Z" }, +] + [[package]] name = "celery" version = "5.5.3" @@ -467,6 +529,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/ae/320161bd181fc06471eed047ecce67b693fd7515b16d495d8932db763426/certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", size = 157650, upload-time = "2025-06-15T02:45:49.977Z" }, ] +[[package]] +name = "chardet" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.2" @@ -850,11 +921,27 @@ wheels = [ name = "filelock" version = "3.18.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, ] +[[package]] +name = "filelock" +version = "3.20.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/58/46/0028a82567109b5ef6e4d2a1f04a583fb513e6cf9527fcdd09afd817deeb/filelock-3.20.0.tar.gz", hash = "sha256:711e943b4ec6be42e1d4e6690b48dc175c822967466bb31c0c293f34334c13f4", size = 18922, upload-time = "2025-10-08T18:03:50.056Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/91/7216b27286936c16f5b4d0c530087e4a54eead683e6b0b73dd0c64844af6/filelock-3.20.0-py3-none-any.whl", hash = "sha256:339b4732ffda5cd79b13f4e2711a31b0365ce445d95d243bb996273d072546a2", size = 16054, upload-time = "2025-10-08T18:03:48.35Z" }, +] + [[package]] name = "flask" version = "2.0.2" @@ -1035,22 +1122,44 @@ wheels = [ name = "google-auth" version = "2.40.3" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] dependencies = [ - { name = "cachetools" }, - { name = "pyasn1-modules" }, - { name = "rsa" }, + { name = "cachetools", version = "5.5.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pyasn1-modules", marker = "python_full_version < '3.11'" }, + { name = "rsa", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/9e/9b/e92ef23b84fa10a64ce4831390b7a4c2e53c0132568d99d4ae61d04c8855/google_auth-2.40.3.tar.gz", hash = "sha256:500c3a29adedeb36ea9cf24b8d10858e152f2412e3ca37829b3fa18e33d63b77", size = 281029, upload-time = "2025-06-04T18:04:57.577Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/17/63/b19553b658a1692443c62bd07e5868adaa0ad746a0751ba62c59568cd45b/google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca", size = 216137, upload-time = "2025-06-04T18:04:55.573Z" }, ] +[[package]] +name = "google-auth" +version = "2.42.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "cachetools", version = "6.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pyasn1-modules", marker = "python_full_version >= '3.11'" }, + { name = "rsa", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/6b/22a77135757c3a7854c9f008ffed6bf4e8851616d77faf13147e9ab5aae6/google_auth-2.42.1.tar.gz", hash = "sha256:30178b7a21aa50bffbdc1ffcb34ff770a2f65c712170ecd5446c4bef4dc2b94e", size = 295541, upload-time = "2025-10-30T16:42:19.381Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/05/adeb6c495aec4f9d93f9e2fc29eeef6e14d452bba11d15bdb874ce1d5b10/google_auth-2.42.1-py2.py3-none-any.whl", hash = "sha256:eb73d71c91fc95dbd221a2eb87477c278a355e7367a35c0d84e6b0e5f9b4ad11", size = 222550, upload-time = "2025-10-30T16:42:17.878Z" }, +] + [[package]] name = "google-auth-oauthlib" version = "1.2.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "google-auth" }, + { name = "google-auth", version = "2.40.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "google-auth", version = "2.42.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "requests-oauthlib" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fb/87/e10bf24f7bcffc1421b84d6f9c3377c30ec305d082cd737ddaa6d8f77f7c/google_auth_oauthlib-1.2.2.tar.gz", hash = "sha256:11046fb8d3348b296302dd939ace8af0a724042e8029c1b872d87fabc9f41684", size = 20955, upload-time = "2025-04-22T16:40:29.172Z" } @@ -2016,6 +2125,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341, upload-time = "2025-06-13T06:52:27.835Z" }, ] +[[package]] +name = "mypy" +version = "1.18.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mypy-extensions" }, + { name = "pathspec" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/6f/657961a0743cff32e6c0611b63ff1c1970a0b482ace35b069203bf705187/mypy-1.18.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eab0cf6294dafe397c261a75f96dc2c31bffe3b944faa24db5def4e2b0f77c", size = 12807973, upload-time = "2025-09-19T00:10:35.282Z" }, + { url = "https://files.pythonhosted.org/packages/10/e9/420822d4f661f13ca8900f5fa239b40ee3be8b62b32f3357df9a3045a08b/mypy-1.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a780ca61fc239e4865968ebc5240bb3bf610ef59ac398de9a7421b54e4a207e", size = 11896527, upload-time = "2025-09-19T00:10:55.791Z" }, + { url = "https://files.pythonhosted.org/packages/aa/73/a05b2bbaa7005f4642fcfe40fb73f2b4fb6bb44229bd585b5878e9a87ef8/mypy-1.18.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448acd386266989ef11662ce3c8011fd2a7b632e0ec7d61a98edd8e27472225b", size = 12507004, upload-time = "2025-09-19T00:11:05.411Z" }, + { url = "https://files.pythonhosted.org/packages/4f/01/f6e4b9f0d031c11ccbd6f17da26564f3a0f3c4155af344006434b0a05a9d/mypy-1.18.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f9e171c465ad3901dc652643ee4bffa8e9fef4d7d0eece23b428908c77a76a66", size = 13245947, upload-time = "2025-09-19T00:10:46.923Z" }, + { url = "https://files.pythonhosted.org/packages/d7/97/19727e7499bfa1ae0773d06afd30ac66a58ed7437d940c70548634b24185/mypy-1.18.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:592ec214750bc00741af1f80cbf96b5013d81486b7bb24cb052382c19e40b428", size = 13499217, upload-time = "2025-09-19T00:09:39.472Z" }, + { url = "https://files.pythonhosted.org/packages/9f/4f/90dc8c15c1441bf31cf0f9918bb077e452618708199e530f4cbd5cede6ff/mypy-1.18.2-cp310-cp310-win_amd64.whl", hash = "sha256:7fb95f97199ea11769ebe3638c29b550b5221e997c63b14ef93d2e971606ebed", size = 9766753, upload-time = "2025-09-19T00:10:49.161Z" }, + { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" }, + { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" }, + { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" }, + { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" }, + { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" }, + { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" }, + { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" }, + { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" }, + { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" }, + { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" }, + { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" }, + { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" }, + { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" }, + { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" }, + { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" }, + { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" }, + { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" }, + { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "networkx" version = "3.4.2" @@ -2168,6 +2331,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl", hash = "sha256:978e4ac767ec4ba5b86c6eaa52e5a2a3bc748a2ca839e8cc798f1cc6ce6efb0f", size = 18905, upload-time = "2024-05-06T19:51:39.271Z" }, ] +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, +] + [[package]] name = "pexpect" version = "4.9.0" @@ -2270,11 +2442,27 @@ wheels = [ name = "platformdirs" version = "4.3.8" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, ] +[[package]] +name = "platformdirs" +version = "4.5.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/61/33/9611380c2bdb1225fdef633e2a9610622310fed35ab11dac9620972ee088/platformdirs-4.5.0.tar.gz", hash = "sha256:70ddccdd7c99fc5942e9fc25636a8b34d04c24b335100223152c2803e4063312", size = 21632, upload-time = "2025-10-08T17:44:48.791Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/cb/ac7874b3e5d58441674fb70742e6c374b28b0c7cb988d37d991cde47166c/platformdirs-4.5.0-py3-none-any.whl", hash = "sha256:e578a81bb873cbb89a41fcc904c7ef523cc18284b7e3b3ccf06aca1403b7ebd3", size = 18651, upload-time = "2025-10-08T17:44:47.223Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -2444,7 +2632,8 @@ dependencies = [ { name = "dill" }, { name = "isort" }, { name = "mccabe" }, - { name = "platformdirs" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "platformdirs", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "tomlkit" }, ] @@ -2477,6 +2666,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" }, ] +[[package]] +name = "pyproject-api" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "packaging", marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/fd/437901c891f58a7b9096511750247535e891d2d5a5a6eefbc9386a2b41d5/pyproject_api-1.9.1.tar.gz", hash = "sha256:43c9918f49daab37e302038fc1aed54a8c7a91a9fa935d00b9a485f37e0f5335", size = 22710, upload-time = "2025-05-12T14:41:58.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/e6/c293c06695d4a3ab0260ef124a74ebadba5f4c511ce3a4259e976902c00b/pyproject_api-1.9.1-py3-none-any.whl", hash = "sha256:7d6238d92f8962773dd75b5f0c4a6a27cce092a14b623b811dba656f3b628948", size = 13158, upload-time = "2025-05-12T14:41:56.217Z" }, +] + +[[package]] +name = "pyproject-api" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "packaging", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/45/7b/c0e1333b61d41c69e59e5366e727b18c4992688caf0de1be10b3e5265f6b/pyproject_api-1.10.0.tar.gz", hash = "sha256:40c6f2d82eebdc4afee61c773ed208c04c19db4c4a60d97f8d7be3ebc0bbb330", size = 22785, upload-time = "2025-10-09T19:12:27.21Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/cc/cecf97be298bee2b2a37dd360618c819a2a7fd95251d8e480c1f0eb88f3b/pyproject_api-1.10.0-py3-none-any.whl", hash = "sha256:8757c41a79c0f4ab71b99abed52b97ecf66bd20b04fa59da43b5840bac105a09", size = 13218, upload-time = "2025-10-09T19:12:24.428Z" }, +] + [[package]] name = "pyreadline3" version = "3.5.4" @@ -3434,7 +3655,8 @@ version = "2.15.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "absl-py" }, - { name = "google-auth" }, + { name = "google-auth", version = "2.40.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "google-auth", version = "2.42.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "google-auth-oauthlib" }, { name = "grpcio" }, { name = "markdown" }, @@ -3555,7 +3777,8 @@ version = "3.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py", extra = ["linkify", "plugins"] }, - { name = "platformdirs" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "platformdirs", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "rich" }, { name = "typing-extensions" }, ] @@ -3690,21 +3913,85 @@ wheels = [ [[package]] name = "tox" -version = "3.24.4" +version = "4.20.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "filelock" }, - { name = "packaging" }, - { name = "pluggy" }, - { name = "py" }, - { name = "six" }, - { name = "toml" }, - { name = "virtualenv" }, + { name = "cachetools", version = "5.5.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "chardet", marker = "python_full_version < '3.11'" }, + { name = "colorama", marker = "python_full_version < '3.11'" }, + { name = "filelock", version = "3.18.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "packaging", marker = "python_full_version < '3.11'" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "pluggy", marker = "python_full_version < '3.11'" }, + { name = "pyproject-api", version = "1.9.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "virtualenv", version = "20.31.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/4a/55f9dba99aad874ae54a7fb2310c940e978fd0155eb3576ddebec000fca7/tox-4.20.0.tar.gz", hash = "sha256:5b78a49b6eaaeab3ae4186415e7c97d524f762ae967c63562687c3e5f0ec23d5", size = 181364, upload-time = "2024-09-19T03:46:15.252Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/ee/6f9bf37f197578f98fb450f1aeebf4570f85b24b00d846bbde6e11489bd1/tox-4.20.0-py3-none-any.whl", hash = "sha256:21a8005e3d3fe5658a8e36b8ca3ed13a4230429063c5cc2a2fdac6ee5aa0de34", size = 157087, upload-time = "2024-09-19T03:46:12.754Z" }, +] + +[[package]] +name = "tox" +version = "4.32.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", ] -sdist = { url = "https://files.pythonhosted.org/packages/d2/78/ad720ade1c6c5b24e407856fb8fc578896ed8e2a603832bb85be3825b551/tox-3.24.4.tar.gz", hash = "sha256:c30b57fa2477f1fb7c36aa1d83292d5c2336cd0018119e1b1c17340e2c2708ca", size = 316762, upload-time = "2021-09-16T09:45:00.904Z" } +dependencies = [ + { name = "cachetools", version = "6.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "chardet", marker = "python_full_version >= '3.11'" }, + { name = "colorama", marker = "python_full_version >= '3.11'" }, + { name = "filelock", version = "3.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging", marker = "python_full_version >= '3.11'" }, + { name = "platformdirs", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pluggy", marker = "python_full_version >= '3.11'" }, + { name = "pyproject-api", version = "1.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "virtualenv", version = "20.35.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/59/bf/0e4dbd42724cbae25959f0e34c95d0c730df03ab03f54d52accd9abfc614/tox-4.32.0.tar.gz", hash = "sha256:1ad476b5f4d3679455b89a992849ffc3367560bbc7e9495ee8a3963542e7c8ff", size = 203330, upload-time = "2025-10-24T18:03:38.132Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/78/b0/ce98616ec9c3f270495a2493cde4d81b1f499057222ae77a8103aea59777/tox-3.24.4-py2.py3-none-any.whl", hash = "sha256:5e274227a53dc9ef856767c21867377ba395992549f02ce55eb549f9fb9a8d10", size = 85645, upload-time = "2021-09-16T09:44:58.664Z" }, + { url = "https://files.pythonhosted.org/packages/fc/cc/e09c0d663a004945f82beecd4f147053567910479314e8d01ba71e5d5dea/tox-4.32.0-py3-none-any.whl", hash = "sha256:451e81dc02ba8d1ed20efd52ee409641ae4b5d5830e008af10fe8823ef1bd551", size = 175905, upload-time = "2025-10-24T18:03:36.337Z" }, +] + +[[package]] +name = "tox-uv" +version = "1.13.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] +dependencies = [ + { name = "packaging", marker = "python_full_version < '3.11'" }, + { name = "tox", version = "4.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "uv", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/93/1f06c3cbfd4c1aa23859d49a76c7e65b51e60715bc22b2dd16cbff9c1e71/tox_uv-1.13.1.tar.gz", hash = "sha256:a8504b8db4bf6c81cba7cd3518851a3f1e0f6991d22272a4cc08ebe1b7f38cca", size = 15645, upload-time = "2024-10-11T16:14:57.301Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/8e/94afb25547f5e4987801e8f6aa11e357190f72f31eb363267a3cb2fa6a88/tox_uv-1.13.1-py3-none-any.whl", hash = "sha256:b163dd28ca37a9f4c6d8cbac11153be27c2e929b58bcae62e323ffa8f71c327d", size = 13383, upload-time = "2024-10-11T16:14:55.885Z" }, +] + +[[package]] +name = "tox-uv" +version = "1.29.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "packaging", marker = "python_full_version >= '3.11'" }, + { name = "tox", version = "4.32.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "uv", marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4f/90/06752775b8cfadba8856190f5beae9f552547e0f287e0246677972107375/tox_uv-1.29.0.tar.gz", hash = "sha256:30fa9e6ad507df49d3c6a2f88894256bcf90f18e240a00764da6ecab1db24895", size = 23427, upload-time = "2025-10-09T20:40:27.384Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/17/221d62937c4130b044bb437caac4181e7e13d5536bbede65264db1f0ac9f/tox_uv-1.29.0-py3-none-any.whl", hash = "sha256:b1d251286edeeb4bc4af1e24c8acfdd9404700143c2199ccdbb4ea195f7de6cc", size = 17254, upload-time = "2025-10-09T20:40:25.885Z" }, ] [[package]] @@ -3728,6 +4015,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, ] +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20251008" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/83/24ed25dd0c6277a1a170c180ad9eef5879ecc9a4745b58d7905a4588c80d/types_python_dateutil-2.9.0.20251008.tar.gz", hash = "sha256:c3826289c170c93ebd8360c3485311187df740166dbab9dd3b792e69f2bc1f9c", size = 16128, upload-time = "2025-10-08T02:51:34.93Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/af/5d24b8d49ef358468ecfdff5c556adf37f4fd28e336b96f923661a808329/types_python_dateutil-2.9.0.20251008-py3-none-any.whl", hash = "sha256:b9a5232c8921cf7661b29c163ccc56055c418ab2c6eabe8f917cbcc73a4c4157", size = 17934, upload-time = "2025-10-08T02:51:33.55Z" }, +] + +[[package]] +name = "types-pyyaml" +version = "6.0.12.20250915" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522, upload-time = "2025-09-15T03:01:00.728Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, +] + +[[package]] +name = "types-requests" +version = "2.31.0.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/b8/c1e8d39996b4929b918aba10dba5de07a8b3f4c8487bb61bb79882544e69/types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0", size = 15535, upload-time = "2023-09-27T06:19:38.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/a1/6f8dc74d9069e790d604ddae70cb46dcbac668f1bb08136e7b0f2f5cd3bf/types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9", size = 14516, upload-time = "2023-09-27T06:19:36.373Z" }, +] + +[[package]] +name = "types-toml" +version = "0.10.8.20240310" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/86/47/3e4c75042792bff8e90d7991aa5c51812cc668828cc6cce711e97f63a607/types-toml-0.10.8.20240310.tar.gz", hash = "sha256:3d41501302972436a6b8b239c850b26689657e25281b48ff0ec06345b8830331", size = 4392, upload-time = "2024-03-10T02:18:37.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/a2/d32ab58c0b216912638b140ab2170ee4b8644067c293b170e19fba340ccc/types_toml-0.10.8.20240310-py3-none-any.whl", hash = "sha256:627b47775d25fa29977d9c70dc0cbab3f314f32c8d8d0c012f2ef5de7aaec05d", size = 4777, upload-time = "2024-03-10T02:18:36.568Z" }, +] + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/73/de/b9d7a68ad39092368fb21dd6194b362b98a1daeea5dcfef5e1adb5031c7e/types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f", size = 11239, upload-time = "2023-07-20T15:19:31.307Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/7b/3fc711b2efea5e85a7a0bbfe269ea944aa767bbba5ec52f9ee45d362ccf3/types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e", size = 15377, upload-time = "2023-07-20T15:19:30.379Z" }, +] + [[package]] name = "typing-extensions" version = "4.7.1" @@ -3764,6 +4099,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225, upload-time = "2024-08-29T15:43:08.921Z" }, ] +[[package]] +name = "uv" +version = "0.9.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/f6/9914f57d152cfcb85f3a26f8fbac3c88e4eb9cbe88639076241e16819334/uv-0.9.7.tar.gz", hash = "sha256:555ee72146b8782c73d755e4a21c9885c6bfc81db0ffca2220d52dddae007eb7", size = 3705596, upload-time = "2025-10-30T22:17:18.652Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/38/cee64a9dcefd46f83a922c4e31d9cd9d91ce0d27a594192f7df677151eb4/uv-0.9.7-py3-none-linux_armv6l.whl", hash = "sha256:134e0daac56f9e399ccdfc9e4635bc0a13c234cad9224994c67bae462e07399a", size = 20614967, upload-time = "2025-10-30T22:16:31.274Z" }, + { url = "https://files.pythonhosted.org/packages/6f/b7/1b1ff8dfde05e9d27abf29ebf22da48428fe1e16f0b4d65a839bd2211303/uv-0.9.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:1aaf79b4234400e9e2fbf5b50b091726ccbb0b6d4d032edd3dfd4c9673d89dca", size = 19692886, upload-time = "2025-10-30T22:16:35.893Z" }, + { url = "https://files.pythonhosted.org/packages/f5/7d/b618174d8a8216af350398ace03805b2b2df6267b1745abf45556c2fda58/uv-0.9.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0fdbfad5b367e7a3968264af6da5bbfffd4944a90319042f166e8df1a2d9de09", size = 18345022, upload-time = "2025-10-30T22:16:38.45Z" }, + { url = "https://files.pythonhosted.org/packages/13/4c/03fafb7d28289d54ac7a34507f1e97e527971f8b0ee2c5e957045966a1a6/uv-0.9.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:635e82c2d0d8b001618af82e4f2724350f15814f6462a71b3ebd44adec21f03c", size = 20170427, upload-time = "2025-10-30T22:16:41.099Z" }, + { url = "https://files.pythonhosted.org/packages/35/0e/f1316da150453755bb88cf4232e8934de71a0091eb274a8b69d948535453/uv-0.9.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56a440ccde7624a7bc070e1c2492b358c67aea9b8f17bc243ea27c5871c8d02c", size = 20234277, upload-time = "2025-10-30T22:16:43.521Z" }, + { url = "https://files.pythonhosted.org/packages/37/b8/cb62cd78151b235c5da9290f0e3fb032b36706f2922208a691678aa0f2df/uv-0.9.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b5f1fb8203a77853db176000e8f30d5815ab175dc46199db059f97a72fc51110", size = 21180078, upload-time = "2025-10-30T22:16:45.857Z" }, + { url = "https://files.pythonhosted.org/packages/be/e5/6107249d23f06fa1739496e89699e76169037b4643144b28b324efc3075d/uv-0.9.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:bb8bfcc2897f7653522abc2cae80233af756ad857bfbbbbe176f79460cbba417", size = 22743896, upload-time = "2025-10-30T22:16:48.487Z" }, + { url = "https://files.pythonhosted.org/packages/df/94/69d8e0bb29c140305e7677bc8c98c765468a55cb10966e77bb8c69bf815d/uv-0.9.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89697fa0d7384ba047daf75df844ee7800235105e41d08e0c876861a2b4aa90e", size = 22361126, upload-time = "2025-10-30T22:16:51.366Z" }, + { url = "https://files.pythonhosted.org/packages/c0/0d/d186456cd0d7972ed026e5977b8a12e1f94c923fc3d6e86c7826c6f0d1fe/uv-0.9.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9810ee8173dce129c49b338d5e97f3d7c7e9435f73e0b9b26c2f37743d3bb9e", size = 21477489, upload-time = "2025-10-30T22:16:53.757Z" }, + { url = "https://files.pythonhosted.org/packages/c7/59/61d8e9f1734069049abe9e593961de602397c7194712346906c075fec65f/uv-0.9.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cf6bc2482d1293cc630f66b862b494c09acda9b7faff7307ef52667a2b3ad49", size = 21382006, upload-time = "2025-10-30T22:16:56.117Z" }, + { url = "https://files.pythonhosted.org/packages/74/ac/090dbde63abb56001190392d29ca2aa654eebc146a693b5dda68da0df2fb/uv-0.9.7-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:7019f4416925f4091b9d28c1cf3e8444cf910c4ede76bdf1f6b9a56ca5f97985", size = 20255103, upload-time = "2025-10-30T22:16:58.434Z" }, + { url = "https://files.pythonhosted.org/packages/56/e7/ca2d99a4ce86366731547a84b5a2c946528b8d6d28c74ac659c925955a0c/uv-0.9.7-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:edd768f6730bba06aa10fdbd80ee064569f7236806f636bf65b68136a430aad0", size = 21311768, upload-time = "2025-10-30T22:17:01.259Z" }, + { url = "https://files.pythonhosted.org/packages/d8/1a/c5d9e57f52aa30bfee703e6b9e5b5072102cfc706f3444377bb0de79eac7/uv-0.9.7-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:d6e5fe28ca05a4b576c0e8da5f69251dc187a67054829cfc4afb2bfa1767114b", size = 20239129, upload-time = "2025-10-30T22:17:03.815Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ab/16110ca6b1c4aaad79b4f2c6bc102c416a906e5d29947d0dc774f6ef4365/uv-0.9.7-py3-none-musllinux_1_1_i686.whl", hash = "sha256:34fe0af83fcafb9e2b786f4bd633a06c878d548a7c479594ffb5607db8778471", size = 20647326, upload-time = "2025-10-30T22:17:06.33Z" }, + { url = "https://files.pythonhosted.org/packages/89/a9/2a8129c796831279cc0c53ffdd19dd6133d514805e52b1ef8a2aa0ff8912/uv-0.9.7-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:777bb1de174319245a35e4f805d3b4484d006ebedae71d3546f95e7c28a5f436", size = 21604958, upload-time = "2025-10-30T22:17:09.046Z" }, + { url = "https://files.pythonhosted.org/packages/73/97/616650cb4dd5fbaabf8237469e1bc84710ae878095d359999982e1bc8ecf/uv-0.9.7-py3-none-win32.whl", hash = "sha256:bcf878528bd079fe8ae15928b5dfa232fac8b0e1854a2102da6ae1a833c31276", size = 19418913, upload-time = "2025-10-30T22:17:11.384Z" }, + { url = "https://files.pythonhosted.org/packages/de/7f/e3cdaffac70852f5ff933b04c7b8a06c0f91f41e563f04b689caa65b71bd/uv-0.9.7-py3-none-win_amd64.whl", hash = "sha256:62b315f62669899076a1953fba6baf50bd2b57f66f656280491331dcedd7e6c6", size = 21443513, upload-time = "2025-10-30T22:17:13.785Z" }, + { url = "https://files.pythonhosted.org/packages/89/79/8278452acae2fe96829485d32e1a2363829c9e42674704562ffcfc06b140/uv-0.9.7-py3-none-win_arm64.whl", hash = "sha256:d13da6521d4e841b1e0a9fda82e793dcf8458a323a9e8955f50903479d0bfa97", size = 19946729, upload-time = "2025-10-30T22:17:16.669Z" }, +] + [[package]] name = "vine" version = "5.1.0" @@ -3777,16 +4138,37 @@ wheels = [ name = "virtualenv" version = "20.31.2" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] dependencies = [ - { name = "distlib" }, - { name = "filelock" }, - { name = "platformdirs" }, + { name = "distlib", marker = "python_full_version < '3.11'" }, + { name = "filelock", version = "3.18.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "platformdirs", version = "4.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" }, ] +[[package]] +name = "virtualenv" +version = "20.35.4" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "distlib", marker = "python_full_version >= '3.11'" }, + { name = "filelock", version = "3.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "platformdirs", version = "4.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/20/28/e6f1a6f655d620846bd9df527390ecc26b3805a0c5989048c210e22c5ca9/virtualenv-20.35.4.tar.gz", hash = "sha256:643d3914d73d3eeb0c552cbb12d7e82adf0e504dbf86a3182f8771a153a1971c", size = 6028799, upload-time = "2025-10-29T06:57:40.511Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/0c/c05523fa3181fdf0c9c52a6ba91a23fbf3246cc095f26f6516f9c60e6771/virtualenv-20.35.4-py3-none-any.whl", hash = "sha256:c21c9cede36c9753eeade68ba7d523529f228a403463376cf821eaae2b650f1b", size = 6005095, upload-time = "2025-10-29T06:57:37.598Z" }, +] + [[package]] name = "wcwidth" version = "0.2.13"