diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index d37d205da4..3a0e3f953a 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -55,9 +55,21 @@ jobs: name: End-to-end test (Docker) runs-on: ubuntu-latest steps: + - name: Remove unnecessary files from the base image + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Checkout repository uses: actions/checkout@v4 + - name: Extract project version + id: version + run: | + VERSION=$(python -c "exec(open('metadata.py').read()); print(__version__)") + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "Using version: $VERSION" + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 id: setup-buildx @@ -67,6 +79,7 @@ jobs: with: context: . file: ./docker/database/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_database:test cache-from: type=gha,scope=container-database @@ -78,6 +91,7 @@ jobs: with: context: . file: ./docker/keyman/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_keyman:test cache-from: type=gha,scope=container-keyman @@ -89,6 +103,7 @@ jobs: with: context: . file: ./docker/rabbitmq/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_rabbitmq:test cache-from: type=gha,scope=container-rabbitmq @@ -100,6 +115,7 @@ jobs: with: context: . file: ./docker/backend/Dockerfile + build-args: VERSION=${{ steps.version.outputs.version }} platforms: linux/amd64 tags: ghcr.io/${{ github.repository_owner }}/augur_backend:test cache-from: type=gha,scope=container-backend diff --git a/CITATION.cff b/CITATION.cff index e26f3d8a86..01514fb22f 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -10,5 +10,5 @@ authors: given-names: Matt title: "Open Source Community Health: Analytical Metrics and Their Corresponding Narratives" doi: 10.1109/SoHeal52568.2021.00010 -date-released: 2021 +date-released: 2021-01-01 url: https://www.seangoggins.net/wp-content/plugins/zotpress/lib/request/request.dl.php?api_user_id=655145&dlkey=HNG22ZSU&content_type=application/pdf diff --git a/README.md b/README.md index 9c7acddc65..bac449c3d8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.90.0 +# Augur NEW Release v0.90.3 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data - less data carpentry for everyone else! The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot), a public instance of 8Knot is available [here](https://metrix.chaoss.io) - this is tied to a public instance of [Augur](https://ai.chaoss.io). @@ -11,7 +11,7 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o ## NEW RELEASE ALERT! **If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**. -Augur is now releasing a dramatically improved new version. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.90.0). +Augur is now releasing a dramatically improved new version. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.90.3). - The `release` branch is a stable version of our new architecture, which features: diff --git a/augur/api/metrics/repo_meta.py b/augur/api/metrics/repo_meta.py index ffc8fc84ef..c39922e17b 100644 --- a/augur/api/metrics/repo_meta.py +++ b/augur/api/metrics/repo_meta.py @@ -1240,3 +1240,59 @@ def aggregate_summary(repo_group_id, repo_id=None, begin_date=None, end_date=Non results = pd.read_sql(summarySQL, conn, params={'repo_id': repo_id, 'begin_date': begin_date, 'end_date': end_date}) return results + +@register_metric() +def clones(repo_group_id, repo_id=None, begin_date=None, end_date=None): + """ + Returns the number of repository clones (total and unique) for a given repo or repo group. + :param repo_group_id: The repository's repo_group_id + :param repo_id: The repository's repo_id, defaults to None + :param begin_date: Start date for filtering clone data (optional) + :param end_date: End date for filtering clone data (optional) + :return: DataFrame of clone counts (total and unique) per day + """ + if not begin_date: + begin_date = '1970-1-1 00:00:00' + if not end_date: + end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + if repo_id: + clones_sql = s.sql.text(""" + SELECT + repo_id, + clone_data_timestamp AS date, + count_clones AS total_clones, + unique_clones + FROM augur_data.repo_clones_data + WHERE repo_id = :repo_id + AND clone_data_timestamp BETWEEN :begin_date AND :end_date + ORDER BY clone_data_timestamp + """) + with current_app.engine.connect() as conn: + results = pd.read_sql(clones_sql, conn, params={ + 'repo_id': repo_id, + 'begin_date': begin_date, + 'end_date': end_date + }) + return results + else: + clones_sql = s.sql.text(""" + SELECT + repo_id, + clone_data_timestamp AS date, + count_clones AS total_clones, + unique_clones + FROM augur_data.repo_clones_data + WHERE repo_id IN ( + SELECT repo_id FROM augur_data.repo WHERE repo_group_id = :repo_group_id + ) + AND clone_data_timestamp BETWEEN :begin_date AND :end_date + ORDER BY repo_id, clone_data_timestamp + """) + with current_app.engine.connect() as conn: + results = pd.read_sql(clones_sql, conn, params={ + 'repo_group_id': repo_group_id, + 'begin_date': begin_date, + 'end_date': end_date + }) + return results diff --git a/augur/api/routes/__init__.py b/augur/api/routes/__init__.py index 03c2e2fa71..8176dad94b 100644 --- a/augur/api/routes/__init__.py +++ b/augur/api/routes/__init__.py @@ -4,10 +4,8 @@ from .batch import * from .collection_status import * from .config import * -from .contributor_reports import * from .manager import * from .nonstandard_metrics import * -from .pull_request_reports import * from .user import * from .dei import * from .util import * diff --git a/augur/api/routes/contributor_reports.py b/augur/api/routes/contributor_reports.py deleted file mode 100644 index 711f321b3e..0000000000 --- a/augur/api/routes/contributor_reports.py +++ /dev/null @@ -1,1272 +0,0 @@ -import psycopg2 -import psycopg2 -import sqlalchemy as salc -import numpy as np -import warnings -import datetime -import pandas as pd -from math import pi -from flask import request, send_file, Response, current_app - -# import visualization libraries -from bokeh.io import export_png -from bokeh.embed import json_item -from bokeh.plotting import figure -from bokeh.models import Label, LabelSet, ColumnDataSource, Legend -from bokeh.palettes import Colorblind -from bokeh.layouts import gridplot -from bokeh.transform import cumsum - -from augur.api.routes import AUGUR_API_VERSION -from ..server import app - -warnings.filterwarnings('ignore') - -def quarters(month, year): - if 1 <= month <= 3: - return '01' + '/' + year - elif 4 <= month <= 6: - return '04' + '/' + year - elif 5 <= month <= 9: - return '07' + '/' + year - elif 10 <= month <= 12: - return '10' + '/' + year - -def new_contributor_data_collection(repo_id, required_contributions): - - rank_list = [] - for num in range(1, required_contributions + 1): - rank_list.append(num) - rank_tuple = tuple(rank_list) - - contributor_query = salc.sql.text(f""" - - SELECT * FROM ( - SELECT ID AS - cntrb_id, - A.created_at AS created_at, - date_part('month', A.created_at::DATE) AS month, - date_part('year', A.created_at::DATE) AS year, - A.repo_id, - repo_name, - full_name, - login, - ACTION, - rank() OVER ( - PARTITION BY id - ORDER BY A.created_at ASC - ) - FROM - ( - ( - SELECT - canonical_id AS ID, - created_at AS created_at, - repo_id, - 'issue_opened' AS ACTION, - contributors.cntrb_full_name AS full_name, - contributors.cntrb_login AS login - FROM - augur_data.issues - LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = issues.reporter_id - LEFT OUTER JOIN ( - SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - cntrb_canonical AS canonical_email, - data_collection_date, - cntrb_id AS canonical_id - FROM augur_data.contributors - WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - WHERE - repo_id = {repo_id} - AND pull_request IS NULL - GROUP BY - canonical_id, - repo_id, - issues.created_at, - contributors.cntrb_full_name, - contributors.cntrb_login - ) UNION ALL - ( - SELECT - canonical_id AS ID, - TO_TIMESTAMP( cmt_author_date, 'YYYY-MM-DD' ) AS created_at, - repo_id, - 'commit' AS ACTION, - contributors.cntrb_full_name AS full_name, - contributors.cntrb_login AS login - FROM - augur_data.commits - LEFT OUTER JOIN augur_data.contributors ON cntrb_email = cmt_author_email - LEFT OUTER JOIN ( - SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - cntrb_canonical AS canonical_email, - data_collection_date, cntrb_id AS canonical_id - FROM augur_data.contributors - WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - WHERE - repo_id = {repo_id} - GROUP BY - repo_id, - canonical_email, - canonical_id, - commits.cmt_author_date, - contributors.cntrb_full_name, - contributors.cntrb_login - ) UNION ALL - ( - SELECT - message.cntrb_id AS ID, - created_at AS created_at, - commits.repo_id, - 'commit_comment' AS ACTION, - contributors.cntrb_full_name AS full_name, - contributors.cntrb_login AS login - - FROM - augur_data.commit_comment_ref, - augur_data.commits, - augur_data.message - LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - LEFT OUTER JOIN ( - SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - cntrb_canonical AS canonical_email, - data_collection_date, cntrb_id AS canonical_id - FROM augur_data.contributors - WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - WHERE - commits.cmt_id = commit_comment_ref.cmt_id - AND commits.repo_id = {repo_id} - AND commit_comment_ref.msg_id = message.msg_id - - GROUP BY - ID, - commits.repo_id, - commit_comment_ref.created_at, - contributors.cntrb_full_name, - contributors.cntrb_login - ) UNION ALL - ( - SELECT - issue_events.cntrb_id AS ID, - issue_events.created_at AS created_at, - issues.repo_id, - 'issue_closed' AS ACTION, - contributors.cntrb_full_name AS full_name, - contributors.cntrb_login AS login - FROM - augur_data.issues, - augur_data.issue_events - LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = issue_events.cntrb_id - LEFT OUTER JOIN ( - SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - cntrb_canonical AS canonical_email, - data_collection_date, - cntrb_id AS canonical_id - FROM augur_data.contributors - WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - WHERE - issues.repo_id = {repo_id} - AND issues.issue_id = issue_events.issue_id - AND issues.pull_request IS NULL - AND issue_events.cntrb_id IS NOT NULL - AND ACTION = 'closed' - GROUP BY - issue_events.cntrb_id, - issues.repo_id, - issue_events.created_at, - contributors.cntrb_full_name, - contributors.cntrb_login - ) UNION ALL - ( - SELECT - pr_augur_contributor_id AS ID, - pr_created_at AS created_at, - pull_requests.repo_id, - 'open_pull_request' AS ACTION, - contributors.cntrb_full_name AS full_name, - contributors.cntrb_login AS login - FROM - augur_data.pull_requests - LEFT OUTER JOIN augur_data.contributors ON pull_requests.pr_augur_contributor_id = contributors.cntrb_id - LEFT OUTER JOIN ( - SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - cntrb_canonical AS canonical_email, - data_collection_date, - cntrb_id AS canonical_id - FROM augur_data.contributors - WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - WHERE - pull_requests.repo_id = {repo_id} - GROUP BY - pull_requests.pr_augur_contributor_id, - pull_requests.repo_id, - pull_requests.pr_created_at, - contributors.cntrb_full_name, - contributors.cntrb_login - ) UNION ALL - ( - SELECT - message.cntrb_id AS ID, - msg_timestamp AS created_at, - pull_requests.repo_id as repo_id, - 'pull_request_comment' AS ACTION, - contributors.cntrb_full_name AS full_name, - contributors.cntrb_login AS login - FROM - augur_data.pull_requests, - augur_data.pull_request_message_ref, - augur_data.message - LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - LEFT OUTER JOIN ( - SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - cntrb_canonical AS canonical_email, - data_collection_date, - cntrb_id AS canonical_id - FROM augur_data.contributors - WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - WHERE - pull_requests.repo_id = {repo_id} - AND pull_request_message_ref.pull_request_id = pull_requests.pull_request_id - AND pull_request_message_ref.msg_id = message.msg_id - GROUP BY - message.cntrb_id, - pull_requests.repo_id, - message.msg_timestamp, - contributors.cntrb_full_name, - contributors.cntrb_login - ) UNION ALL - ( - SELECT - issues.reporter_id AS ID, - msg_timestamp AS created_at, - issues.repo_id as repo_id, - 'issue_comment' AS ACTION, - contributors.cntrb_full_name AS full_name, - contributors.cntrb_login AS login - FROM - issues, - issue_message_ref, - message - LEFT OUTER JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id - LEFT OUTER JOIN ( - SELECT DISTINCT ON ( cntrb_canonical ) cntrb_full_name, - cntrb_canonical AS canonical_email, - data_collection_date, - cntrb_id AS canonical_id - FROM augur_data.contributors - WHERE cntrb_canonical = cntrb_email ORDER BY cntrb_canonical - ) canonical_full_names ON canonical_full_names.canonical_email =contributors.cntrb_canonical - WHERE - issues.repo_id = {repo_id} - AND issue_message_ref.msg_id = message.msg_id - AND issues.issue_id = issue_message_ref.issue_id - AND issues.pull_request_id = NULL - GROUP BY - issues.reporter_id, - issues.repo_id, - message.msg_timestamp, - contributors.cntrb_full_name, - contributors.cntrb_login - ) - ) A, - repo - WHERE - ID IS NOT NULL - AND A.repo_id = repo.repo_id - GROUP BY - A.ID, - A.repo_id, - A.ACTION, - A.created_at, - repo.repo_name, - A.full_name, - A.login - ORDER BY - cntrb_id - ) b - WHERE RANK IN {rank_tuple} - - """) - - with current_app.engine.connect() as conn: - df = pd.read_sql(contributor_query, conn) - - df = df.loc[~df['full_name'].str.contains('bot', na=False)] - df = df.loc[~df['login'].str.contains('bot', na=False)] - - df = df.loc[~df['cntrb_id'].isin(df[df.duplicated(['cntrb_id', 'created_at', 'repo_id', 'rank'])]['cntrb_id'])] - - # add yearmonths to contributor - df[['month', 'year']] = df[['month', 'year']].astype(int).astype(str) - df['yearmonth'] = df['month'] + '/' + df['year'] - df['yearmonth'] = pd.to_datetime(df['yearmonth']) - - # add column with every value being one, so when the contributor df is concatenated - # with the months df, the filler months won't be counted in the sums - df['new_contributors'] = 1 - - # add quarters to contributor dataframe - df['month'] = df['month'].astype(int) - df['quarter'] = df.apply(lambda x: quarters(x['month'], x['year']), axis=1, result_type='reduce') - df['quarter'] = pd.to_datetime(df['quarter']) - - return df - -def months_data_collection(start_date, end_date): - - # months_query makes a df of years and months, this is used to fill - # the months with no data in the visualizations - months_query = salc.sql.text(f""" - SELECT * - FROM - ( - SELECT - date_part( 'year', created_month :: DATE ) AS year, - date_part( 'month', created_month :: DATE ) AS MONTH - FROM - (SELECT * - FROM ( - SELECT created_month :: DATE - FROM generate_series (TIMESTAMP '{start_date}', TIMESTAMP '{end_date}', INTERVAL '1 month' ) created_month ) d ) x - ) y - """) - - with current_app.engine.connect() as conn: - months_df = pd.read_sql(months_query, conn) - - # add yearmonths to months_df - months_df[['year', 'month']] = months_df[['year', 'month']].astype(float).astype(int).astype(str) - months_df['yearmonth'] = months_df['month'] + '/' + months_df['year'] - months_df['yearmonth'] = pd.to_datetime(months_df['yearmonth']) - - # filter months_df with start_date and end_date, the contributor df is filtered in the visualizations - months_df = months_df.set_index(months_df['yearmonth']) - months_df = months_df.loc[start_date: end_date].reset_index(drop=True) - - # add quarters to months dataframe - months_df['month'] = months_df['month'].astype(int) - months_df['quarter'] = months_df.apply(lambda x: quarters(x['month'], x['year']), axis=1) - months_df['quarter'] = pd.to_datetime(months_df['quarter']) - - return months_df - -def get_repo_id_start_date_and_end_date(): - - now = datetime.datetime.now() - - repo_id = request.args.get('repo_id') - start_date = str(request.args.get('start_date', "{}-01-01".format(now.year - 1))) - end_date = str(request.args.get('end_date', "{}-{}-{}".format(now.year, now.month, now.day))) - - if repo_id: - - if start_date < end_date: - return int(repo_id), start_date, end_date, None - else: - - error = { - "message": "Invalid end_date. end_date is before the start_date", - "status_code": 400 - } - - return int(repo_id), None, None, error - - else: - error = { - "message": "repo_id not specified. Use this endpoint to get a list of available repos: http:///api/unstable/repos", - "status_code": 400 - } - return None, None, None, error - -def filter_out_repeats_without_required_contributions_in_required_time(repeat_list, repeats_df, required_time, - first_list): - - differences = [] - for i in range(0, len(repeat_list)): - time_difference = repeat_list[i] - first_list[i] - total = time_difference.days * 86400 + time_difference.seconds - differences.append(total) - repeats_df['differences'] = differences - - # remove contributions who made enough contributions, but not in a short enough time - repeats_df = repeats_df.loc[repeats_df['differences'] <= required_time * 86400] - - return repeats_df - -def compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, required_time, start_date): - - # create a copy of contributor dataframe - driver_df = input_df.copy() - - # remove first time contributors before begin date, along with their second contribution - mask = (driver_df['yearmonth'] < start_date) - driver_df = driver_df[~driver_df['cntrb_id'].isin(driver_df.loc[mask]['cntrb_id'])] - - # determine if contributor is a drive by by finding all the cntrb_id's that do not have a second contribution - repeats_df = driver_df.copy() - - repeats_df = repeats_df.loc[repeats_df['rank'].isin([1, required_contributions])] - - # removes all the contributors that only have a first contirbution - repeats_df = repeats_df[ - repeats_df['cntrb_id'].isin(repeats_df.loc[driver_df['rank'] == required_contributions]['cntrb_id'])] - - repeat_list = repeats_df.loc[driver_df['rank'] == required_contributions]['created_at'].tolist() - first_list = repeats_df.loc[driver_df['rank'] == 1]['created_at'].tolist() - - repeats_df = repeats_df.loc[driver_df['rank'] == 1] - repeats_df['type'] = 'repeat' - - repeats_df = filter_out_repeats_without_required_contributions_in_required_time( - repeat_list, repeats_df, required_time, first_list) - - repeats_df = repeats_df.loc[repeats_df['differences'] <= required_time * 86400] - - repeat_cntrb_ids = repeats_df['cntrb_id'].to_list() - - drive_by_df = driver_df.loc[~driver_df['cntrb_id'].isin(repeat_cntrb_ids)] - - drive_by_df = drive_by_df.loc[driver_df['rank'] == 1] - drive_by_df['type'] = 'drive_by' - - return drive_by_df, repeats_df - -def add_caption_to_visualizations(caption, required_contributions, required_time, plot_width): - - caption_plot = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - - caption_plot.add_layout(Label( - x=0, - y=160, - x_units='screen', - y_units='screen', - text='{}'.format(caption.format(required_contributions, required_time)), - text_font='times', - text_font_size='15pt', - render_mode='css' - )) - caption_plot.outline_line_color = None - - return caption_plot - -def format_new_cntrb_bar_charts(plot, rank, group_by_format_string): - - plot.xgrid.grid_line_color = None - plot.y_range.start = 0 - plot.axis.minor_tick_line_color = None - plot.outline_line_color = None - - plot.title.align = "center" - plot.title.text_font_size = "18px" - - plot.yaxis.axis_label = 'Second Time Contributors' if rank == 2 else 'New Contributors' - plot.xaxis.axis_label = group_by_format_string - - plot.xaxis.axis_label_text_font_size = "18px" - plot.yaxis.axis_label_text_font_size = "16px" - - plot.xaxis.major_label_text_font_size = "16px" - plot.xaxis.major_label_orientation = 45.0 - - plot.yaxis.major_label_text_font_size = "16px" - - return plot - -def add_charts_and_captions_to_correct_positions(chart_plot, caption_plot, rank, contributor_type, - row_1, row_2, row_3, row_4): - - if rank == 1 and (contributor_type == 'All' or contributor_type == 'repeat'): - row_1.append(chart_plot) - row_2.append(caption_plot) - elif rank == 2 or contributor_type == 'drive_by': - row_3.append(chart_plot) - row_4.append(caption_plot) - -def get_new_cntrb_bar_chart_query_params(): - - group_by = str(request.args.get('group_by', "quarter")) - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - return group_by, required_contributions, required_time - -def remove_rows_before_start_date(df, start_date): - - mask = (df['yearmonth'] < start_date) - result_df = df[~df['cntrb_id'].isin(df.loc[mask]['cntrb_id'])] - - return result_df - -def remove_rows_with_null_values(df, not_null_columns=[]): - """Remove null data from pandas df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that are searched for NULL values - type: list - default: [] (means all columns will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check all columns for NULL values - - Return Value - -- Modified Pandas Dataframe - """ - - if len(not_null_columns) == 0: - not_null_columns = df.columns.to_list() - - total_rows_removed = 0 - for col in not_null_columns: - rows_removed = len(df.loc[df[col].isnull() is True]) - - if rows_removed > 0: - print(f"{rows_removed} rows have been removed because of null values in column {col}") - total_rows_removed += rows_removed - - df = df.loc[df[col].isnull() is False] - - if total_rows_removed > 0: - print(f"\nTotal rows removed because of null data: {total_rows_removed}"); - else: - print("No null data found") - - return df - -def get_needed_columns(df, list_of_columns): - """Get only a specific list of columns from a Pandas Dataframe - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that will be kept in dataframe - type: list - - Return Value - -- Modified Pandas Dataframe - """ - return df[list_of_columns] - -def filter_data(df, needed_columns, not_null_columns=[]): - """Filters out the unneeded rows in the df, and removed NULL data from df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- needed_columns - description: the columns to keep in the dataframe - - -- not_null_columns - description: columns that will be searched for NULL data, - if NULL values are found those rows will be removed - default: [] (means all columns in needed_columns list will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check - all columns in needed_columns list for NULL values - Return Value - -- Modified Pandas Dataframe - """ - - if all(x in needed_columns for x in not_null_columns): - - df = get_needed_columns(df, needed_columns) - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - df = df.dropna(subset=not_null_columns)#remove_rows_with_null_values(df, not_null_columns) - - return df - else: - print("Developer error, not null columns should be a subset of needed columns") - return df - -@app.route('/{}/contributor_reports/new_contributors_bar/'.format(AUGUR_API_VERSION), methods=["GET"]) -def new_contributors_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by, required_contributions, required_time = get_new_cntrb_bar_chart_query_params() - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - # TODO remove full_name from data for all charts since it is not needed in vis generation - not_null_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - input_df = input_df.dropna(subset=not_null_columns)#remove_rows_with_null_values(input_df, not_null_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - contributor_types = ['All', 'repeat', 'drive_by'] - ranks = [1, 2] - - row_1, row_2, row_3, row_4 = [], [], [], [] - - all_df = remove_rows_before_start_date(input_df, start_date) - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - for rank in ranks: - for contributor_type in contributor_types: - - # do not display these visualizations since drive-by's do not have second contributions, and the - # second contribution of a repeat contributor is the same thing as the all the second time contributors - if (rank == 2 and contributor_type == 'drive_by') or (rank == 2 and contributor_type == 'repeat'): - continue - - if contributor_type == 'repeat': - driver_df = repeats_df - - caption = """This graph shows repeat contributors in the specified time period. Repeat contributors - are contributors who have made {} or more contributions in {} days and their first contribution is - in the specified time period. New contributors are individuals who make their first contribution - in the specified time period.""" - - elif contributor_type == 'drive_by': - - driver_df = drive_by_df - - caption = """This graph shows fly by contributors in the specified time period. Fly by contributors - are contributors who make less than the required {} contributions in {} days. New contributors are - individuals who make their first contribution in the specified time period. Of course, then, “All - fly-by’s are by definition first time contributors”. However, not all first time contributors are - fly-by’s.""" - - elif contributor_type == 'All': - - if rank == 1: - driver_df = all_df - # makes df with all first time contributors - driver_df = driver_df.loc[driver_df['rank'] == 1] - caption = """This graph shows all the first time contributors, whether they contribute once, or - contribute multiple times. New contributors are individuals who make their first contribution - in the specified time period.""" - - if rank == 2: - - driver_df = all_df - - # creates df with all second time contributors - driver_df = driver_df.loc[driver_df['rank'] == 2] - caption = """This graph shows the second contribution of all - first time contributors in the specified time period.""" - # y_axis_label = 'Second Time Contributors' - - # filter by end_date, this is not done with the begin date filtering because a repeat contributor - # will look like drive-by if the second contribution is removed by end_date filtering - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # adds all months to driver_df so the lists of dates will include all months and years - driver_df = pd.concat([driver_df, months_df]) - - data = pd.DataFrame() - if group_by == 'year': - - data['dates'] = driver_df[group_by].unique() - - # new contributor counts for y-axis - data['new_contributor_counts'] = driver_df.groupby([group_by]).sum().reset_index()[ - 'new_contributors'] - - # used to format x-axis and title - group_by_format_string = "Year" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, - # then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - - # new contributor counts for y-axis - data['new_contributor_counts'] = driver_df.groupby([date_column]).sum().reset_index()[ - 'new_contributors'] - - # if the data set is large enough it will dynamically assign the width, if the data set is - # too small it will by default set to 870 pixel so the title fits - if len(data['new_contributor_counts']) >= 15: - plot_width = 46 * len(data['new_contributor_counts']) - else: - plot_width = 870 - - # create a dict convert an integer number into a word - # used to turn the rank into a word, so it is nicely displayed in the title - numbers = ['Zero', 'First', 'Second'] - num_conversion_dict = {} - for i in range(1, len(numbers)): - num_conversion_dict[i] = numbers[i] - number = '{}'.format(num_conversion_dict[rank]) - - # define pot for bar chart - p = figure(x_range=data['dates'], plot_height=400, plot_width=plot_width, - title="{}: {} {} Time Contributors Per {}".format(repo_dict[repo_id], - contributor_type.capitalize(), number, - group_by_format_string), - y_range=(0, max(data['new_contributor_counts']) * 1.15), margin=(0, 0, 10, 0)) - - p.vbar(x=data['dates'], top=data['new_contributor_counts'], width=0.8) - - source = ColumnDataSource( - data=dict(dates=data['dates'], new_contributor_counts=data['new_contributor_counts'])) - - # add contributor_count labels to chart - p.add_layout(LabelSet(x='dates', y='new_contributor_counts', text='new_contributor_counts', y_offset=4, - text_font_size="13pt", text_color="black", - source=source, text_align='center')) - - plot = format_new_cntrb_bar_charts(p, rank, group_by_format_string) - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - add_charts_and_captions_to_correct_positions(plot, caption_plot, rank, contributor_type, row_1, - row_2, row_3, row_4) - - # puts plots together into a grid - grid = gridplot([row_1, row_2, row_3, row_4]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/new_contributors_stacked_bar/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def new_contributors_stacked_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by, required_contributions, required_time = get_new_cntrb_bar_chart_query_params() - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - contributor_types = ['All', 'repeat', 'drive_by'] - ranks = [1, 2] - - row_1, row_2, row_3, row_4 = [], [], [], [] - - all_df = remove_rows_before_start_date(input_df, start_date) - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - for rank in ranks: - for contributor_type in contributor_types: - # do not display these visualizations since drive-by's do not have second contributions, - # and the second contribution of a repeat contributor is the same thing as the all the - # second time contributors - if (rank == 2 and contributor_type == 'drive_by') or (rank == 2 and contributor_type == 'repeat'): - continue - - if contributor_type == 'repeat': - driver_df = repeats_df - - caption = """This graph shows repeat contributors in the specified time period. Repeat contributors - are contributors who have made {} or more contributions in {} days and their first contribution is - in the specified time period. New contributors are individuals who make their first contribution in - the specified time period.""" - - elif contributor_type == 'drive_by': - - driver_df = drive_by_df - - caption = """This graph shows fly by contributors in the specified time period. Fly by contributors - are contributors who make less than the required {} contributions in {} days. New contributors are - individuals who make their first contribution in the specified time period. Of course, then, “All - fly-by’s are by definition first time contributors”. However, not all first time contributors are - fly-by’s.""" - - elif contributor_type == 'All': - if rank == 1: - driver_df = all_df - - # makes df with all first time contributors - driver_df = driver_df.loc[driver_df['rank'] == 1] - - caption = """This graph shows all the first time contributors, whether they contribute once, or - contribute multiple times. New contributors are individuals who make their first contribution in - the specified time period.""" - - if rank == 2: - driver_df = all_df - - # creates df with all second time contributor - driver_df = driver_df.loc[driver_df['rank'] == 2] - caption = """This graph shows the second contribution of all first time - contributors in the specified time period.""" - # y_axis_label = 'Second Time Contributors' - - # filter by end_date, this is not done with the begin date filtering because a repeat contributor will - # look like drive-by if the second contribution is removed by end_date filtering - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # adds all months to driver_df so the lists of dates will include all months and years - driver_df = pd.concat([driver_df, months_df]) - - actions = ['open_pull_request', 'pull_request_comment', 'commit', 'issue_closed', 'issue_opened', - 'issue_comment'] - - data = pd.DataFrame() - if group_by == 'year': - - # x-axis dates - data['dates'] = driver_df[group_by].unique() - - for contribution_type in actions: - data[contribution_type] = \ - pd.concat([driver_df.loc[driver_df['action'] == contribution_type], months_df]).groupby( - group_by).sum().reset_index()['new_contributors'] - - # new contributor counts for all actions - data['new_contributor_counts'] = driver_df.groupby([group_by]).sum().reset_index()[ - 'new_contributors'] - - # used to format x-axis and graph title - group_by_format_string = "Year" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, - # then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - - # new_contributor counts for each type of action - for contribution_type in actions: - data[contribution_type] = \ - pd.concat([driver_df.loc[driver_df['action'] == contribution_type], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - - print(data.to_string()) - - # new contributor counts for all actions - data['new_contributor_counts'] = driver_df.groupby([date_column]).sum().reset_index()[ - 'new_contributors'] - - # if the data set is large enough it will dynamically assign the width, if the data set is too small it - # will by default set to 870 pixel so the title fits - if len(data['new_contributor_counts']) >= 15: - plot_width = 46 * len(data['new_contributor_counts']) + 200 - else: - plot_width = 870 - - # create list of values for data source dict - actions_df_references = [] - for action in actions: - actions_df_references.append(data[action]) - - # created dict with the actions as the keys, and the values as the values from the df - data_source = {actions[i]: actions_df_references[i] for i in range(len(actions))} - data_source.update({'dates': data['dates'], 'New Contributor Counts': data['new_contributor_counts']}) - - colors = Colorblind[len(actions)] - - source = ColumnDataSource(data=data_source) - - # create a dict convert an integer number into a word - # used to turn the rank into a word, so it is nicely displayed in the title - numbers = ['Zero', 'First', 'Second'] - num_conversion_dict = {} - for i in range(1, len(numbers)): - num_conversion_dict[i] = numbers[i] - number = '{}'.format(num_conversion_dict[rank]) - - # y_max = 20 - # creates plot to hold chart - p = figure(x_range=data['dates'], plot_height=400, plot_width=plot_width, - title='{}: {} {} Time Contributors Per {}'.format(repo_dict[repo_id], - contributor_type.capitalize(), number, - group_by_format_string), - toolbar_location=None, y_range=(0, max(data['new_contributor_counts']) * 1.15)) - # max(data['new_contributor_counts'])* 1.15), margin = (0, 0, 0, 0)) - - vbar = p.vbar_stack(actions, x='dates', width=0.8, color=colors, source=source) - - # add total count labels - p.add_layout(LabelSet(x='dates', y='New Contributor Counts', text='New Contributor Counts', y_offset=4, - text_font_size="14pt", - text_color="black", source=source, text_align='center')) - - # add legend - legend = Legend(items=[(date, [action]) for (date, action) in zip(actions, vbar)], location=(0, 120), - label_text_font_size="16px") - p.add_layout(legend, 'right') - - plot = format_new_cntrb_bar_charts(p, rank, group_by_format_string) - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - add_charts_and_captions_to_correct_positions(plot, caption_plot, rank, contributor_type, row_1, - row_2, row_3, row_4) - - # puts plots together into a grid - grid = gridplot([row_1, row_2, row_3, row_4]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/returning_contributors_pie_chart/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def returning_contributors_pie_chart(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - print(repeats_df.to_string()) - - driver_df = pd.concat([drive_by_df, repeats_df]) - - # filter df by end date - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # first and second time contributor counts - drive_by_contributors = driver_df.loc[driver_df['type'] == 'drive_by'].count()['new_contributors'] - repeat_contributors = driver_df.loc[driver_df['type'] == 'repeat'].count()['new_contributors'] - - # create a dict with the # of drive-by and repeat contributors - x = {'Drive_By': drive_by_contributors, - 'Repeat': repeat_contributors} - - # turn dict 'x' into a dataframe with columns 'contributor_type', and 'counts' - data = pd.Series(x).reset_index(name='counts').rename(columns={'index': 'contributor_type'}) - - data['angle'] = data['counts'] / data['counts'].sum() * 2 * pi - data['color'] = ('#0072B2', '#E69F00') - data['percentage'] = ((data['angle'] / (2 * pi)) * 100).round(2) - - # format title - title = "{}: Number of Returning " \ - "Contributors out of {} from {} to {}" \ - .format(repo_dict[repo_id], drive_by_contributors + repeat_contributors, start_date, end_date) - - title_text_font_size = 18 - - plot_width = 850 - - # sets plot_width to width of title if title is wider than 850 pixels - if len(title) * title_text_font_size / 2 > plot_width: - plot_width = int(len(title) * title_text_font_size / 2) - - # creates plot for chart - p = figure(plot_height=450, plot_width=plot_width, title=title, - toolbar_location=None, x_range=(-0.5, 1.3), tools='hover', tooltips="@contributor_type", - margin=(0, 0, 0, 0)) - - p.wedge(x=0.87, y=1, radius=0.4, start_angle=cumsum('angle', include_zero=True), - end_angle=cumsum('angle'), line_color=None, fill_color='color', - legend_field='contributor_type', source=data) - - start_point = 0.88 - for i in range(0, len(data['percentage'])): - # percentages - p.add_layout(Label(x=-0.17, y=start_point + 0.13 * (len(data['percentage']) - 1 - i), - text='{}%'.format(data.iloc[i]['percentage']), - render_mode='css', text_font_size='15px', text_font_style='bold')) - - # contributors - p.add_layout(Label(x=0.12, y=start_point + 0.13 * (len(data['percentage']) - 1 - i), - text='{}'.format(data.iloc[i]['counts']), - render_mode='css', text_font_size='15px', text_font_style='bold')) - - # percentages header - p.add_layout( - Label(x=-0.22, y=start_point + 0.13 * (len(data['percentage'])), text='Percentages', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - # legend header - p.add_layout( - Label(x=-0.43, y=start_point + 0.13 * (len(data['percentage'])), text='Category', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - # contributors header - p.add_layout( - Label(x=0, y=start_point + 0.13 * (len(data['percentage'])), text='# Contributors', render_mode='css', - text_font_size='15px', text_font_style='bold')) - - p.axis.axis_label = None - p.axis.visible = False - p.grid.grid_line_color = None - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.legend.location = "center_left" - p.legend.border_line_color = None - p.legend.label_text_font_style = 'bold' - p.legend.label_text_font_size = "15px" - - plot = p - - caption = """This pie chart shows the percentage of new contributors who were fly-by or repeat contributors. - Fly by contributors are contributors who make less than the required {0} contributions in {1} days. - New contributors are individuals who make their first contribution in the specified time period. - Repeat contributors are contributors who have made {0} or more contributions in {1} days and their - first contribution is in the specified time period.""" - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - # put graph and caption plot together into one grid - grid = gridplot([[plot], [caption_plot]]) - - filename = export_png(grid) - - return send_file(filename) - -@app.route('/{}/contributor_reports/returning_contributors_stacked_bar/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def returning_contributors_stacked_bar(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "quarter")) - required_contributions = int(request.args.get('required_contributions', 4)) - required_time = int(request.args.get('required_time', 365)) - - input_df = new_contributor_data_collection(repo_id=repo_id, required_contributions=required_contributions) - months_df = months_data_collection(start_date=start_date, end_date=end_date) - - needed_columns = ['cntrb_id', 'created_at', 'month', 'year', 'repo_id', 'repo_name', 'login', 'action', - 'rank', 'yearmonth', 'new_contributors', 'quarter'] - - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - drive_by_df, repeats_df = compute_fly_by_and_returning_contributors_dfs(input_df, required_contributions, - required_time, start_date) - - driver_df = pd.concat([drive_by_df, repeats_df, months_df]) - - # filter by end_date - mask = (driver_df['yearmonth'] < end_date) - driver_df = driver_df.loc[mask] - - # create df to hold data needed for chart - data = pd.DataFrame() - if group_by == 'year': - - # x-axis dates - data['dates'] = driver_df[group_by].unique() - - data['repeat_counts'] = \ - driver_df.loc[driver_df['type'] == 'repeat'].groupby(group_by).count().reset_index()['new_contributors'] - data['drive_by_counts'] = \ - driver_df.loc[driver_df['type'] == 'drive_by'].groupby(group_by).count().reset_index()[ - 'new_contributors'] - - # new contributor counts for all contributor counts - total_counts = [] - for i in range(0, len(data['drive_by_counts'])): - total_counts.append(data.iloc[i]['drive_by_counts'] + data.iloc[i]['repeat_counts']) - data['total_counts'] = total_counts - - # used to format x-axis and graph title - group_by_format_string = "Year" - - # font size of drive by and repeat labels - label_text_font_size = "14pt" - - elif group_by == 'quarter' or group_by == 'month': - - # set variables to group the data by quarter or month - if group_by == 'quarter': - date_column = 'quarter' - group_by_format_string = "Quarter" - - elif group_by == 'month': - date_column = 'yearmonth' - group_by_format_string = "Month" - - # modifies the driver_df[date_column] to be a string with year and month, then finds all the unique values - data['dates'] = np.unique(np.datetime_as_string(driver_df[date_column], unit='M')) - data['drive_by_counts'] = pd.concat([driver_df.loc[driver_df['type'] == 'drive_by'], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - data['repeat_counts'] = pd.concat([driver_df.loc[driver_df['type'] == 'repeat'], months_df]).groupby( - date_column).sum().reset_index()['new_contributors'] - - # new contributor counts for all contributor types - total_counts = [] - for i in range(0, len(data['drive_by_counts'])): - total_counts.append(data.iloc[i]['drive_by_counts'] + data.iloc[i]['repeat_counts']) - data['total_counts'] = total_counts - - # font size of drive by and repeat labels - label_text_font_size = "13pt" - - data_source = {'Dates': data['dates'], - 'Fly By': data['drive_by_counts'], - 'Repeat': data['repeat_counts'], - 'All': data['total_counts']} - - groups = ["Fly By", "Repeat"] - - colors = ['#56B4E9', '#E69F00'] - - source = ColumnDataSource(data=data_source) - - # format title - title_text_font_size = 18 - - # if the data set is large enough it will dynamically assign the width, if the data set - # is too small it will by default set to 780 pixel so the title fits - if len(data['total_counts']) >= 13: - plot_width = 46 * len(data['total_counts']) + 210 - else: - plot_width = 780 - - p = figure(x_range=data['dates'], plot_height=500, plot_width=plot_width, - title="{}: Fly By and Repeat Contributor Counts per {}".format(repo_dict[repo_id], - group_by_format_string), - toolbar_location=None, y_range=(0, max(total_counts) * 1.15), margin=(0, 0, 0, 0)) - - vbar = p.vbar_stack(groups, x='Dates', width=0.8, color=colors, source=source) - - # add total counts above bars - p.add_layout(LabelSet(x='Dates', y='All', text='All', y_offset=8, text_font_size="14pt", - text_color="black", source=source, text_align='center')) - - # add drive by count labels - p.add_layout(LabelSet(x='Dates', y='Fly By', text='Fly By', y_offset=-22, text_font_size=label_text_font_size, - text_color="black", source=source, text_align='center')) - - # add repeat count labels - p.add_layout(LabelSet(x='Dates', y='All', text='Repeat', y_offset=-22, text_font_size=label_text_font_size, - text_color="black", source=source, text_align='center')) - - # add legend - legend = Legend(items=[(date, [group]) for (date, group) in zip(groups, vbar)], location=(0, 200), - label_text_font_size="16px") - p.add_layout(legend, 'right') - - p.xgrid.grid_line_color = None - p.y_range.start = 0 - p.axis.minor_tick_line_color = None - p.outline_line_color = None - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.yaxis.axis_label = '# Contributors' - p.xaxis.axis_label = group_by_format_string - - p.xaxis.axis_label_text_font_size = "18px" - p.yaxis.axis_label_text_font_size = "16px" - - p.xaxis.major_label_text_font_size = "16px" - p.xaxis.major_label_orientation = 45.0 - - p.yaxis.major_label_text_font_size = "16px" - - p.legend.label_text_font_size = "20px" - - plot = p - - caption = """This graph shows the number of new contributors in the specified time period, and indicates how - many were fly-by and repeat contributors. Fly by contributors are contributors who make less than the required - {0} contributions in {1} days. New contributors are individuals who make their first contribution in the - specified time period. Repeat contributors are contributors who have made {0} or more contributions in {1} - days and their first contribution is in the specified time period.""" - - caption_plot = add_caption_to_visualizations(caption, required_contributions, required_time, plot_width) - - # put graph and caption plot together into one grid - grid = gridplot([[plot], [caption_plot]]) - - filename = export_png(grid) - - return send_file(filename) diff --git a/augur/api/routes/pull_request_reports.py b/augur/api/routes/pull_request_reports.py deleted file mode 100644 index 13aea31e8d..0000000000 --- a/augur/api/routes/pull_request_reports.py +++ /dev/null @@ -1,1922 +0,0 @@ -# import psycopg2 -import pandas as pd -import sqlalchemy as salc -import numpy as np -import warnings -import datetime -import json -# from scipy import stats -from flask import request, send_file, Response, current_app -import math - -from bokeh.palettes import Colorblind, mpl, Category20 -from bokeh.layouts import gridplot, column -from bokeh.models.annotations import Title -from bokeh.io import export_png, show # get_screenshot_as_png -# from bokeh.io.export import get_screenshot_as_png -from bokeh.embed import json_item -from bokeh.models import ColumnDataSource, Legend, LabelSet, Range1d, Label, FactorRange, BasicTicker, ColorBar, \ - LinearColorMapper, PrintfTickFormatter -from bokeh.plotting import figure -from bokeh.models.glyphs import Rect -from bokeh.transform import dodge, factor_cmap, transform - -# from selenium.webdriver import Firefox, FirefoxOptions -# options = FirefoxOptions() -# options.headless = True -# webdriver = Firefox(options=options) -#export_png(item, path, webdriver=webdriver) - -warnings.filterwarnings('ignore') - -from augur.api.routes import AUGUR_API_VERSION -from ..server import app - -def pull_request_data_collection(repo_id, start_date, end_date): - - pr_query = salc.sql.text(f""" - SELECT - repo.repo_id AS repo_id, - pull_requests.pr_src_id AS pr_src_id, - repo.repo_name AS repo_name, - pr_src_author_association, - repo_groups.rg_name AS repo_group, - pull_requests.pr_src_state, - pull_requests.pr_merged_at, - pull_requests.pr_created_at AS pr_created_at, - pull_requests.pr_closed_at AS pr_closed_at, - date_part( 'year', pr_created_at :: DATE ) AS CREATED_YEAR, - date_part( 'month', pr_created_at :: DATE ) AS CREATED_MONTH, - date_part( 'year', pr_closed_at :: DATE ) AS CLOSED_YEAR, - date_part( 'month', pr_closed_at :: DATE ) AS CLOSED_MONTH, - pr_src_meta_label, - pr_head_or_base, - ( EXTRACT ( EPOCH FROM pull_requests.pr_closed_at ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_close, - ( EXTRACT ( EPOCH FROM pull_requests.pr_closed_at ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_close, - ( EXTRACT ( EPOCH FROM first_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_first_response, - ( EXTRACT ( EPOCH FROM first_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_first_response, - ( EXTRACT ( EPOCH FROM last_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 3600 AS hours_to_last_response, - ( EXTRACT ( EPOCH FROM last_response_time ) - EXTRACT ( EPOCH FROM pull_requests.pr_created_at ) ) / 86400 AS days_to_last_response, - first_response_time, - last_response_time, - EXTRACT ( EPOCH FROM average_time_between_responses), - assigned_count, - review_requested_count, - labeled_count, - subscribed_count, - mentioned_count, - referenced_count, - closed_count, - head_ref_force_pushed_count, - merged_count::INT, - milestoned_count, - unlabeled_count, - head_ref_deleted_count, - comment_count, - COALESCE(lines_added, 0) as lines_added, - COALESCE(lines_removed, 0) as lines_removed, - commit_count, - COALESCE(file_count, 0) as file_count - FROM - repo, - repo_groups, - pull_requests LEFT OUTER JOIN ( - SELECT pull_requests.pull_request_id, - count(*) FILTER (WHERE action = 'assigned') AS assigned_count, - count(*) FILTER (WHERE action = 'review_requested') AS review_requested_count, - count(*) FILTER (WHERE action = 'labeled') AS labeled_count, - count(*) FILTER (WHERE action = 'unlabeled') AS unlabeled_count, - count(*) FILTER (WHERE action = 'subscribed') AS subscribed_count, - count(*) FILTER (WHERE action = 'mentioned') AS mentioned_count, - count(*) FILTER (WHERE action = 'referenced') AS referenced_count, - count(*) FILTER (WHERE action = 'closed') AS closed_count, - count(*) FILTER (WHERE action = 'head_ref_force_pushed') AS head_ref_force_pushed_count, - count(*) FILTER (WHERE action = 'head_ref_deleted') AS head_ref_deleted_count, - count(*) FILTER (WHERE action = 'milestoned') AS milestoned_count, - COALESCE(count(*) FILTER (WHERE action = 'merged'), 0) AS merged_count, - COALESCE(MIN(message.msg_timestamp), pull_requests.pr_merged_at, pull_requests.pr_closed_at) AS first_response_time, - COALESCE(COUNT(DISTINCT message.msg_timestamp), 0) AS comment_count, - COALESCE(MAX(message.msg_timestamp), pull_requests.pr_closed_at) AS last_response_time, - COALESCE((MAX(message.msg_timestamp) - MIN(message.msg_timestamp)) / COUNT(DISTINCT message.msg_timestamp), pull_requests.pr_created_at - pull_requests.pr_closed_at) AS average_time_between_responses - FROM pull_requests - LEFT OUTER JOIN pull_request_events on pull_requests.pull_request_id = pull_request_events.pull_request_id - JOIN repo on repo.repo_id = pull_requests.repo_id - LEFT OUTER JOIN pull_request_message_ref on pull_requests.pull_request_id = pull_request_message_ref.pull_request_id - LEFT OUTER JOIN message on pull_request_message_ref.msg_id = message.msg_id - WHERE repo.repo_id = {repo_id} - GROUP BY pull_requests.pull_request_id - ) response_times - ON pull_requests.pull_request_id = response_times.pull_request_id - LEFT JOIN ( - SELECT pull_request_commits.pull_request_id, count(DISTINCT pr_cmt_sha) AS commit_count - FROM pull_request_commits, pull_requests, pull_request_meta - WHERE pull_requests.pull_request_id = pull_request_commits.pull_request_id - AND pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND pr_cmt_sha <> pull_requests.pr_merge_commit_sha - AND pr_cmt_sha <> pull_request_meta.pr_sha - GROUP BY pull_request_commits.pull_request_id - ) all_commit_counts - ON pull_requests.pull_request_id = all_commit_counts.pull_request_id - LEFT JOIN ( - SELECT MAX(pr_repo_meta_id), pull_request_meta.pull_request_id, pr_head_or_base, pr_src_meta_label - FROM pull_requests, pull_request_meta - WHERE pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND pr_head_or_base = 'base' - GROUP BY pull_request_meta.pull_request_id, pr_head_or_base, pr_src_meta_label - ) base_labels - ON base_labels.pull_request_id = all_commit_counts.pull_request_id - LEFT JOIN ( - SELECT sum(cmt_added) AS lines_added, sum(cmt_removed) AS lines_removed, pull_request_commits.pull_request_id, count(DISTINCT cmt_filename) AS file_count - FROM pull_request_commits, commits, pull_requests, pull_request_meta - WHERE cmt_commit_hash = pr_cmt_sha - AND pull_requests.pull_request_id = pull_request_commits.pull_request_id - AND pull_requests.pull_request_id = pull_request_meta.pull_request_id - AND pull_requests.repo_id = {repo_id} - AND commits.repo_id = pull_requests.repo_id - AND commits.cmt_commit_hash <> pull_requests.pr_merge_commit_sha - AND commits.cmt_commit_hash <> pull_request_meta.pr_sha - GROUP BY pull_request_commits.pull_request_id - ) master_merged_counts - ON base_labels.pull_request_id = master_merged_counts.pull_request_id - WHERE - repo.repo_group_id = repo_groups.repo_group_id - AND repo.repo_id = pull_requests.repo_id - AND repo.repo_id = {repo_id} - ORDER BY - merged_count DESC - """) - - with current_app.engine.connect() as conn: - pr_all = pd.read_sql(pr_query, conn) - - pr_all[['assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', - 'comment_count', - 'commit_count', - 'file_count', - 'lines_added', - 'lines_removed' - ]] = pr_all[['assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', - 'comment_count', - 'commit_count', - 'file_count', - 'lines_added', - 'lines_removed' - ]].astype(float) - # Change years to int so that doesn't display as 2019.0 for example - pr_all[['created_year', 'closed_year']] = pr_all[['created_year', 'closed_year']].fillna(-1).astype(int).astype( - str) - - start_date = pd.to_datetime(start_date) - # end_date = pd.to_datetime('2020-02-01 09:00:00') - end_date = pd.to_datetime(end_date) - pr_all = pr_all[(pr_all['pr_created_at'] > start_date) & (pr_all['pr_closed_at'] < end_date)] - - pr_all['created_year'] = pr_all['created_year'].map(int) - pr_all['created_month'] = pr_all['created_month'].map(int) - pr_all['created_month'] = pr_all['created_month'].map(lambda x: '{0:0>2}'.format(x)) - pr_all['created_yearmonth'] = pd.to_datetime( - pr_all['created_year'].map(str) + '-' + pr_all['created_month'].map(str) + '-01') - - # getting the number of days of (today - created at) for the PRs that are still open - # and putting this in the days_to_close column - - # get timedeltas of creation time to todays date/time - days_to_close_open_pr = datetime.datetime.now() - pr_all.loc[pr_all['pr_src_state'] == 'open']['pr_created_at'] - - # get num days from above timedelta - days_to_close_open_pr = days_to_close_open_pr.apply(lambda x: x.days).astype(int) - - # for only OPEN pr's, set the days_to_close column equal to above dataframe - pr_all.loc[pr_all['pr_src_state'] == 'open'] = pr_all.loc[pr_all['pr_src_state'] == 'open'].assign( - days_to_close=days_to_close_open_pr) - - pr_all.loc[pr_all['pr_src_state'] == 'open'].head() - - # initiate column by setting all null datetimes - pr_all['closed_yearmonth'] = pd.to_datetime(np.nan) - - # Fill column with prettified string of year/month closed that looks like: 2019-07-01 - pr_all.loc[pr_all['pr_src_state'] == 'closed'] = pr_all.loc[pr_all['pr_src_state'] == 'closed'].assign( - closed_yearmonth=pd.to_datetime(pr_all.loc[pr_all['pr_src_state'] == 'closed']['closed_year'].astype(int - ).map( - str) + '-' + pr_all.loc[pr_all['pr_src_state'] == 'closed']['closed_month'].astype(int).map( - str) + '-01')) - - """ Merged flag """ - if 'pr_merged_at' in pr_all.columns.values: - pr_all['pr_merged_at'] = pr_all['pr_merged_at'].fillna(0) - pr_all['merged_flag'] = 'Not Merged / Rejected' - pr_all['merged_flag'].loc[pr_all['pr_merged_at'] != 0] = 'Merged / Accepted' - pr_all['merged_flag'].loc[pr_all['pr_src_state'] == 'open'] = 'Still Open' - del pr_all['pr_merged_at'] - - # Isolate the different state PRs for now - pr_open = pr_all.loc[pr_all['pr_src_state'] == 'open'] - pr_closed = pr_all.loc[pr_all['pr_src_state'] == 'closed'] - pr_merged = pr_all.loc[pr_all['merged_flag'] == 'Merged / Accepted'] - pr_not_merged = pr_all.loc[pr_all['merged_flag'] == 'Not Merged / Rejected'] - - # Filtering the 80th percentile slowest PRs - def filter_20_per_slowest(input_df): - pr_slow20_filtered = pd.DataFrame() - pr_slow20_x = pd.DataFrame() - pr_slow20_filtered = input_df.copy() - pr_slow20_filtered['percentile_rank_local'] = pr_slow20_filtered.days_to_close.rank(pct=True) - pr_slow20_filtered = pr_slow20_filtered.query('percentile_rank_local >= .8', ) - - return pr_slow20_filtered - - pr_slow20_open = filter_20_per_slowest(pr_open) - pr_slow20_closed = filter_20_per_slowest(pr_closed) - pr_slow20_merged = filter_20_per_slowest(pr_merged) - pr_slow20_not_merged = filter_20_per_slowest(pr_not_merged) - pr_slow20_all = filter_20_per_slowest(pr_all) - - return pr_all, pr_open, pr_closed, pr_merged, pr_not_merged, pr_slow20_all, pr_slow20_open, pr_slow20_closed, pr_slow20_merged, pr_slow20_not_merged - -def remove_outliers(input_df, field, num_outliers_repo_map): - df_no_outliers = input_df.copy() - for repo_name, num_outliers in num_outliers_repo_map.items(): - indices_to_drop = input_df.loc[input_df['repo_name'] == repo_name].nlargest(num_outliers, field).index - df_no_outliers = df_no_outliers.drop(index=indices_to_drop) - return df_no_outliers - -def remove_outliers_by_standard_deviation(input_df, column): - '''Takes a dataframe and a numeric column name. - Then removes all rows thare are than 3 standard deviations from the mean. - Returns a df without outliers, the # of outliers removed, outlier cutoff value''' - - # finds rows that are more than 3 standard deviations from the mean - outlier_cutoff = input_df[column].mean() + (3 * input_df[column].std()) - outlier_mask = input_df[column] > outlier_cutoff - - # determine number of outliers - outliers_removed = len(input_df.loc[outlier_mask]) - - df_no_outliers = input_df.loc[~outlier_mask] - - return df_no_outliers, outliers_removed, outlier_cutoff - -def hex_to_RGB(hex): - ''' "#FFFFFF" -> [255,255,255] ''' - # Pass 16 to the integer function for change of base - return [int(hex[i:i + 2], 16) for i in range(1, 6, 2)] - -def color_dict(gradient): - ''' Takes in a list of RGB sub-lists and returns dictionary of - colors in RGB and hex form for use in a graphing function - defined later on ''' - return {"hex": [RGB_to_hex(RGB) for RGB in gradient], - "r": [RGB[0] for RGB in gradient], - "g": [RGB[1] for RGB in gradient], - "b": [RGB[2] for RGB in gradient]} - -def RGB_to_hex(RGB): - ''' [255,255,255] -> "#FFFFFF" ''' - # Components need to be integers for hex to make sense - RGB = [int(x) for x in RGB] - return "#" + "".join(["0{0:x}".format(v) if v < 16 else - "{0:x}".format(v) for v in RGB]) - -def linear_gradient(start_hex, finish_hex="#FFFFFF", n=10): - ''' returns a gradient list of (n) colors between - two hex colors. start_hex and finish_hex - should be the full six-digit color string, - inlcuding the number sign ("#FFFFFF") ''' - # Starting and ending colors in RGB form - s = hex_to_RGB(start_hex) - f = hex_to_RGB(finish_hex) - # Initilize a list of the output colors with the starting color - RGB_list = [s] - # Calcuate a color at each evenly spaced value of t from 1 to n - for t in range(1, n): - # Interpolate RGB vector for color at the current value of t - curr_vector = [ - int(s[j] + (float(t) / (n - 1)) * (f[j] - s[j])) - for j in range(3) - ] - # Add it to our list of output colors - RGB_list.append(curr_vector) - - return color_dict(RGB_list) - -# dict of df types, and their locaiton in the tuple that the function pull_request_data_collection returns -def get_df_tuple_locations(): - return {"pr_all": 0, "pr_open": 1, "pr_closed": 2, "pr_merged": 3, "pr_not_merged": 4, "pr_slow20_all": 5, - "pr_slow20_open": 6, "pr_slow20_closed": 7, "pr_slow20_merged": 8, "pr_slow20_not_merged": 9} - -def add_caption_to_plot(caption_plot, caption): - - caption_plot.add_layout(Label( - x=0, # Change to shift caption left or right - y=160, - x_units='screen', - y_units='screen', - text='{}'.format(caption), - text_font='times', # Use same font as paper - text_font_size='15pt', - render_mode='css' - )) - caption_plot.outline_line_color = None - - return caption_plot - -def remove_rows_with_null_values(df, not_null_columns=[]): - """Remove null data from pandas df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that are searched for NULL values - type: list - default: [] (means all columns will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check all columns for NULL values - - Return Value - -- Modified Pandas Dataframe - """ - - if len(not_null_columns) == 0: - not_null_columns = df.columns.to_list() - - total_rows_removed = 0 - for col in not_null_columns: - rows_removed = len(df.loc[df[col].isnull()]) - #rows_removed = len(df.loc[df[col].isnull() is True]) - - if rows_removed > 0: - print(f"{rows_removed} rows have been removed because of null values in column {col}") - total_rows_removed += rows_removed - - df = df.loc[df[col].isnull() is False] - - if total_rows_removed > 0: - print(f"\nTotal rows removed because of null data: {total_rows_removed}"); - else: - print("No null data found") - - return df - -def get_needed_columns(df, list_of_columns): - """Get only a specific list of columns from a Pandas Dataframe - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- list_of_columns - description: columns that will be kept in dataframe - type: list - - Return Value - -- Modified Pandas Dataframe - """ - return df[list_of_columns] - -def filter_data(df, needed_columns, not_null_columns=[]): - """Filters out the unneeded rows in the df, and removed NULL data from df - - Parameters - -- df - description: the dataframe that will be modified - type: Pandas Dataframe - - -- needed_columns - description: the columns to keep in the dataframe - - -- not_null_columns - description: columns that will be searched for NULL data, - if NULL values are found those rows will be removed - default: [] (means all columns in needed_columns list will be checked for NULL values) - IMPORTANT: if an empty list is passed or nothing is passed it will check - all columns in needed_columns list for NULL values - Return Value - -- Modified Pandas Dataframe - """ - - if all(x in needed_columns for x in not_null_columns): - - df = get_needed_columns(df, needed_columns) - #Use the pandas method bc the other method was erroring on boolean index. - #IM - 9/23/22 - df = df.dropna(subset=not_null_columns)#remove_rows_with_null_values(df, not_null_columns) - - return df - else: - print("Developer error, not null columns should be a subset of needed columns") - return df - -def get_repo_id_start_date_and_end_date(): - - """ Gets the repo_id, start_date, and end_date from the GET requests array - - :return: repo_id - id of the repo data is being retrieved for - :return: start_date - earliest time on visualization. Defaults to the January 1st of last year - :return: end_date - latest time on visualization. Defaults to current date - """ - - now = datetime.datetime.now() - - repo_id = request.args.get('repo_id') - start_date = str(request.args.get('start_date', "{}-01-01".format(now.year - 1))) - end_date = str(request.args.get('end_date', "{}-{}-{}".format(now.year, now.month, now.day))) - - if repo_id: - - if start_date < end_date: - return int(repo_id), start_date, end_date, None - else: - - error = { - "message": "Invalid end_date. end_date is before the start_date", - "status_code": 400 - } - - return int(repo_id), None, None, error - - else: - error = { - "message": "repo_id not specified. Use this endpoint to get a list of available repos: http:///api/unstable/repos", - "status_code": 400 - } - return None, None, None, error - -@app.route('/{}/pull_request_reports/average_commits_per_PR/'.format(AUGUR_API_VERSION), methods=["GET"]) -def average_commits_per_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "month")) - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - y_axis = 'num_commits' - group_by_bars = 'merged_flag' - description = 'All' - - # gets pr_all data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_all"]] - needed_columns = ['repo_id', 'repo_name', 'closed_year', 'closed_yearmonth', group_by_bars, 'commit_count'] - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # print(input_df.to_string()) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() # deep copy input data so we do not change the external dataframe - - # Change closed year to int so that doesn't display as 2019.0 for example - driver_df['closed_year'] = driver_df['closed_year'].astype(int).astype(str) - - # defaults to year - x_axis = 'closed_year' - x_groups = sorted(list(driver_df[x_axis].unique())) - - if group_by == 'month': - x_axis = "closed_yearmonth" - x_groups = np.unique(np.datetime_as_string(input_df[x_axis], unit='M')) - - # inner groups on x_axis they are merged and not_merged - groups = list(driver_df[group_by_bars].unique()) - - # setup color pallete - try: - colors = mpl['Plasma'][len(groups)] - except: - colors = [mpl['Plasma'][3][0]] + [mpl['Plasma'][3][1]] - - merged_avg_values = list(driver_df.loc[driver_df[group_by_bars] == 'Merged / Accepted'].groupby([x_axis], - as_index=False).mean().round( - 1)['commit_count']) - not_merged_avg_values = list( - driver_df.loc[driver_df[group_by_bars] == 'Not Merged / Rejected'].groupby([x_axis], - as_index=False).mean().round(1)[ - 'commit_count']) - - # Setup data in format for grouped bar chart - data = { - 'years': x_groups, - 'Merged / Accepted': merged_avg_values, - 'Not Merged / Rejected': not_merged_avg_values, - } - - x = [(year, pr_state) for year in x_groups for pr_state in groups] - counts = sum(zip(data['Merged / Accepted'], data['Not Merged / Rejected']), ()) - - source = ColumnDataSource(data=dict(x=x, counts=counts)) - - title_beginning = '{}: '.format(repo_dict[repo_id]) - title = "{}Average Commit Counts Per Year for {} Pull Requests".format(title_beginning, description) - - plot_width = len(x_groups) * 300 - title_text_font_size = 16 - - if (len(title) * title_text_font_size / 2) > plot_width: - plot_width = int(len(title) * title_text_font_size / 2) + 40 - - p = figure(x_range=FactorRange(*x), plot_height=450, plot_width=plot_width, title=title, - y_range=(0, max(merged_avg_values + not_merged_avg_values) * 1.15), toolbar_location=None) - - # Vertical bar glyph - p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", - fill_color=factor_cmap('x', palette=colors, factors=groups, start=1, end=2)) - - # Data label - labels = LabelSet(x='x', y='counts', text='counts', # y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="black", - source=source, text_align='center') - p.add_layout(labels) - - p.y_range.start = 0 - p.x_range.range_padding = 0.1 - p.xaxis.major_label_orientation = 1 - p.xgrid.grid_line_color = None - - p.yaxis.axis_label = 'Average Commits / Pull Request' - p.xaxis.axis_label = 'Year Closed' - - p.title.align = "center" - p.title.text_font_size = "{}px".format(title_text_font_size) - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "15px" - - p.yaxis.axis_label_text_font_size = "15px" - p.yaxis.major_label_text_font_size = "15px" - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average commits per pull requests over an entire year," \ - " for merged and not merged pull requests." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "average_commits_per_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - # filename = export_png(grid, timeout=180, webdriver=webdriver) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/average_comments_per_PR/'.format(AUGUR_API_VERSION), methods=["GET"]) -def average_comments_per_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - group_by = 'merged_flag' - x_axis = 'comment_count' - description = "All Closed" - y_axis = 'closed_year' - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', y_axis, group_by, x_axis] - not_null_columns = needed_columns - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() - - try: - y_groups = sorted(list(driver_df[y_axis].unique())) - except: - y_groups = [repo_id] - - groups = driver_df[group_by].unique() - try: - colors = mpl['Plasma'][len(groups)] - except: - colors = [mpl['Plasma'][3][0]] + [mpl['Plasma'][3][1]] - - len_not_merged = len(driver_df.loc[driver_df['merged_flag'] == 'Not Merged / Rejected']) - len_merged = len(driver_df.loc[driver_df['merged_flag'] == 'Merged / Accepted']) - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 650 - p = figure(y_range=y_groups, plot_height=450, plot_width=plot_width, - # y_range=y_groups,#(pr_all[y_axis].min(),pr_all[y_axis].max()) #y_axis_type="datetime", - title='{} {}'.format(title_beginning, "Mean Comments for {} Pull Requests".format(description)), - toolbar_location=None) - - possible_maximums = [] - for y_value in y_groups: - - y_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Merged / Accepted')] - y_not_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Not Merged / Rejected')] - - if len(y_merged_data) > 0: - y_merged_data_mean = y_merged_data[x_axis].mean() - - if (math.isnan(y_merged_data_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_merged_data[x_axis + '_mean'] = y_merged_data_mean.round(1) - - else: - y_merged_data[x_axis + '_mean'] = 0 - - if len(y_not_merged_data) > 0: - y_not_merged_data_mean = y_not_merged_data[x_axis].mean() - - if math.isnan(y_not_merged_data_mean): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_not_merged_data[x_axis + '_mean'] = y_not_merged_data_mean.round(1) - - else: - y_not_merged_data[x_axis + '_mean'] = 0 - - not_merged_source = ColumnDataSource(y_not_merged_data) - merged_source = ColumnDataSource(y_merged_data) - - possible_maximums.append(max(y_not_merged_data[x_axis + '_mean'])) - possible_maximums.append(max(y_merged_data[x_axis + '_mean'])) - - # mean comment count for merged - merged_comment_count_glyph = p.hbar(y=dodge(y_axis, -0.1, range=p.y_range), left=0, right=x_axis + '_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=merged_source, - fill_color="black") # ,legend_label="Mean Days to Close", - # Data label - labels = LabelSet(x=x_axis + '_mean', y=dodge(y_axis, -0.1, range=p.y_range), text=x_axis + '_mean', - y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="black", - source=merged_source, text_align='center') - p.add_layout(labels) - # mean comment count For nonmerged - not_merged_comment_count_glyph = p.hbar(y=dodge(y_axis, 0.1, range=p.y_range), left=0, - right=x_axis + '_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=not_merged_source, - fill_color="#e84d60") # legend_label="Mean Days to Close", - # Data label - labels = LabelSet(x=x_axis + '_mean', y=dodge(y_axis, 0.1, range=p.y_range), text=x_axis + '_mean', - y_offset=-8, x_offset=34, - text_font_size="12pt", text_color="#e84d60", - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # p.y_range.range_padding = 0.1 - p.ygrid.grid_line_color = None - p.legend.location = "bottom_right" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - p.xaxis.axis_label = 'Average Comments / Pull Request' - p.yaxis.axis_label = 'Repository' if y_axis == 'repo_name' else 'Year Closed' if y_axis == 'closed_year' else '' - - legend = Legend( - items=[ - ("Merged Pull Request Mean Comment Count", [merged_comment_count_glyph]), - ("Rejected Pull Request Mean Comment Count", [not_merged_comment_count_glyph]) - ], - - location='center', - orientation='vertical', - border_line_color="black" - ) - p.add_layout(legend, "below") - - p.title.text_font_size = "16px" - p.title.align = "center" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - p.x_range = Range1d(0, max(possible_maximums) * 1.15) - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of comments per merged or not merged pull request." - - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "average_comments_per_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/PR_counts_by_merged_status/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def PR_counts_by_merged_status(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - x_axis = 'closed_year' - description = 'All Closed' - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - pr_closed = df_tuple[df_type["pr_closed"]] - pr_closed_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_closed = filter_data(pr_closed, pr_closed_needed_columns) - - # gets pr_slow20_not_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_not_merged = df_tuple[df_type["pr_slow20_not_merged"]] - pr_slow20_not_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_slow20_not_merged = filter_data(pr_slow20_not_merged, pr_slow20_not_merged_needed_columns,) - - # gets pr_slow20_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_merged = df_tuple[df_type["pr_slow20_merged"]] - pr_slow20_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'merged_flag'] - pr_slow20_merged = filter_data(pr_slow20_merged, pr_slow20_merged_needed_columns) - - if len(pr_closed) == 0 or len(pr_slow20_not_merged) == 0 or len(pr_slow20_merged) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - data_dict = {'All': pr_closed, 'Slowest 20%': pr_slow20_not_merged.append(pr_slow20_merged, ignore_index=True)} - - colors = mpl['Plasma'][6] - - for data_desc, input_df in data_dict.items(): - x_groups = sorted(list(input_df[x_axis].astype(str).unique())) - break - - plot_width = 315 * len(x_groups) - - if plot_width < 900: - plot_width = 900 - title_beginning = repo_dict[repo_id] - p = figure(x_range=x_groups, plot_height=350, plot_width=plot_width, - title='{}: {}'.format(title_beginning, - "Count of {} Pull Requests by Merged Status".format(description)), - toolbar_location=None) - - dodge_amount = 0.12 - color_index = 0 - x_offset = 60 - - all_totals = [] - for data_desc, input_df in data_dict.items(): - driver_df = input_df.copy() - - driver_df[x_axis] = driver_df[x_axis].astype(str) - - groups = sorted(list(driver_df['merged_flag'].unique())) - - driver_df = driver_df.loc[driver_df['repo_id'] == repo_id] - - len_merged = [] - zeros = [] - len_not_merged = [] - totals = [] - - for x_group in x_groups: - len_merged_entry = len( - driver_df.loc[(driver_df['merged_flag'] == 'Merged / Accepted') & (driver_df[x_axis] == x_group)]) - totals += [len(driver_df.loc[(driver_df['merged_flag'] == 'Not Merged / Rejected') & ( - driver_df[x_axis] == x_group)]) + len_merged_entry] - len_not_merged += [len(driver_df.loc[(driver_df['merged_flag'] == 'Not Merged / Rejected') & ( - driver_df[x_axis] == x_group)])] - len_merged += [len_merged_entry] - zeros.append(0) - - data = {'X': x_groups} - for group in groups: - data[group] = [] - for x_group in x_groups: - data[group] += [ - len(driver_df.loc[(driver_df['merged_flag'] == group) & (driver_df[x_axis] == x_group)])] - - data['len_merged'] = len_merged - data['len_not_merged'] = len_not_merged - data['totals'] = totals - data['zeros'] = zeros - - if data_desc == "All": - all_totals = totals - - source = ColumnDataSource(data) - - stacked_bar = p.vbar_stack(groups, x=dodge('X', dodge_amount, range=p.x_range), width=0.2, source=source, - color=colors[1:3], legend_label=[f"{data_desc} " + "%s" % x for x in groups]) - # Data label for merged - - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='zeros', text='len_merged', y_offset=2, - x_offset=x_offset, - text_font_size="12pt", text_color=colors[1:3][0], - source=source, text_align='center') - ) - if min(data['totals']) < 400: - y_offset = 15 - else: - y_offset = 0 - # Data label for not merged - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='totals', text='len_not_merged', - y_offset=y_offset, x_offset=x_offset, - text_font_size="12pt", text_color=colors[1:3][1], - source=source, text_align='center') - ) - # Data label for total - p.add_layout( - LabelSet(x=dodge('X', dodge_amount, range=p.x_range), y='totals', text='totals', y_offset=0, x_offset=0, - text_font_size="12pt", text_color='black', - source=source, text_align='center') - ) - dodge_amount *= -1 - colors = colors[::-1] - x_offset *= -1 - - p.y_range = Range1d(0, max(all_totals) * 1.4) - - p.xgrid.grid_line_color = None - p.legend.location = "top_center" - p.legend.orientation = "horizontal" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - p.yaxis.axis_label = 'Count of Pull Requests' - p.xaxis.axis_label = 'Repository' if x_axis == 'repo_name' else 'Year Closed' if x_axis == 'closed_year' else '' - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - p.outline_line_color = None - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the number of closed pull requests per year in " \ - "four different categories. These four categories are All Merged, All Not Merged," \ - " Slowest 20% Merged, and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "PR_counts_by_merged_status")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/mean_response_times_for_PR/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def mean_response_times_for_PR(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - time_unit = 'days' - x_max = 95 - y_axis = 'closed_year' - description = "All Closed" - legend_position = (410, 10) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - input_df = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', y_axis, 'merged_flag', time_unit + '_to_first_response', - time_unit + '_to_last_response', time_unit + '_to_close'] - input_df = filter_data(input_df, needed_columns) - - if len(input_df) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: input_df.loc[input_df['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = input_df.copy() # deep copy input data so we do not alter the external dataframe - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 950 - p = figure(toolbar_location=None, y_range=sorted(driver_df[y_axis].unique()), plot_width=plot_width, - plot_height=450, # 75*len(driver_df[y_axis].unique()), - title="{}Mean Response Times for Pull Requests {}".format(title_beginning, description)) - - first_response_glyphs = [] - last_response_glyphs = [] - merged_days_to_close_glyphs = [] - not_merged_days_to_close_glyphs = [] - - possible_maximums = [] - - # FIXME repo_set is not defined - # setup color pallete - try: - colors = Colorblind[len(repo_set)] - except: - colors = Colorblind[3] - - y_merged_data_list = [] - y_not_merged_data_list = [] - - # calculate data frist time to obtain the maximum and make sure there is message data - for y_value in driver_df[y_axis].unique(): - - y_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Merged / Accepted')] - y_not_merged_data = driver_df.loc[ - (driver_df[y_axis] == y_value) & (driver_df['merged_flag'] == 'Not Merged / Rejected')] - - if len(y_merged_data) > 0: - - y_merged_data_first_response_mean = y_merged_data[time_unit + '_to_first_response'].mean() - y_merged_data_last_response_mean = y_merged_data[time_unit + '_to_last_response'].mean() - y_merged_data_to_close_mean = y_merged_data[time_unit + '_to_close'].mean() - - if (math.isnan(y_merged_data_first_response_mean) or math.isnan( - y_merged_data_last_response_mean) or math.isnan(y_merged_data_to_close_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_merged_data[time_unit + '_to_first_response_mean'] = y_merged_data_first_response_mean.round(1) - y_merged_data[time_unit + '_to_last_response_mean'] = y_merged_data_last_response_mean.round(1) - y_merged_data[time_unit + '_to_close_mean'] = y_merged_data_to_close_mean.round(1) - else: - y_merged_data[time_unit + '_to_first_response_mean'] = 0.00 - y_merged_data[time_unit + '_to_last_response_mean'] = 0.00 - y_merged_data[time_unit + '_to_close_mean'] = 0.00 - - if len(y_not_merged_data) > 0: - - y_not_merged_data_first_response_mean = y_not_merged_data[time_unit + '_to_first_response'].mean() - y_not_merged_data_last_response_mean = y_not_merged_data[time_unit + '_to_last_response'].mean() - y_not_merged_data_to_close_mean = y_not_merged_data[time_unit + '_to_close'].mean() - - if (math.isnan(y_not_merged_data_first_response_mean) or math.isnan( - y_not_merged_data_last_response_mean) or math.isnan(y_not_merged_data_to_close_mean)): - return Response( - response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', status=200) - else: - y_not_merged_data[ - time_unit + '_to_first_response_mean'] = y_not_merged_data_first_response_mean.round(1) - y_not_merged_data[ - time_unit + '_to_last_response_mean'] = y_not_merged_data_last_response_mean.round(1) - y_not_merged_data[time_unit + '_to_close_mean'] = y_not_merged_data_to_close_mean.round(1) - else: - y_not_merged_data[time_unit + '_to_first_response_mean'] = 0.00 - y_not_merged_data[time_unit + '_to_last_response_mean'] = 0.00 - y_not_merged_data[time_unit + '_to_close_mean'] = 0.00 - - possible_maximums.append(max(y_merged_data[time_unit + '_to_close_mean'])) - possible_maximums.append(max(y_not_merged_data[time_unit + '_to_close_mean'])) - - maximum = max(possible_maximums) * 1.15 - ideal_difference = maximum * 0.064 - - y_merged_data_list.append(y_merged_data) - y_not_merged_data_list.append(y_not_merged_data) - - # loop through data and add it to the plot - for index in range(0, len(y_merged_data_list)): - - y_merged_data = y_merged_data_list[index] - y_not_merged_data = y_not_merged_data_list[index] - - not_merged_source = ColumnDataSource(y_not_merged_data) - merged_source = ColumnDataSource(y_merged_data) - - # mean PR length for merged - merged_days_to_close_glyph = p.hbar(y=dodge(y_axis, -0.1, range=p.y_range), left=0, - right=time_unit + '_to_close_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=merged_source, - fill_color="black") # ,legend_label="Mean Days to Close", - merged_days_to_close_glyphs.append(merged_days_to_close_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_close_mean', y=dodge(y_axis, -0.1, range=p.y_range), - text=time_unit + '_to_close_mean', y_offset=-8, x_offset=34, # 34 - text_font_size="12pt", text_color="black", - source=merged_source, text_align='center') - p.add_layout(labels) - - # mean PR length For nonmerged - not_merged_days_to_close_glyph = p.hbar(y=dodge(y_axis, 0.1, range=p.y_range), left=0, - right=time_unit + '_to_close_mean', - height=0.04 * len(driver_df[y_axis].unique()), - source=not_merged_source, - fill_color="#e84d60") # legend_label="Mean Days to Close", - not_merged_days_to_close_glyphs.append(not_merged_days_to_close_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_close_mean', y=dodge(y_axis, 0.1, range=p.y_range), - text=time_unit + '_to_close_mean', y_offset=-8, x_offset=44, - text_font_size="12pt", text_color="#e84d60", - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # if the difference between two values is less than 6.4 percent move the second one to the right 30 pixels - if (max(y_merged_data[time_unit + '_to_last_response_mean']) - max( - y_merged_data[time_unit + '_to_first_response_mean'])) < ideal_difference: - merged_x_offset = 30 - else: - merged_x_offset = 0 - - # if the difference between two values is less than 6.4 percent move the second one to the right 30 pixels - if (max(y_not_merged_data[time_unit + '_to_last_response_mean']) - max( - y_not_merged_data[time_unit + '_to_first_response_mean'])) < ideal_difference: - not_merged_x_offset = 30 - else: - not_merged_x_offset = 0 - - # if there is only one bar set the y_offsets so the labels will not overlap the bars - if len(driver_df[y_axis].unique()) == 1: - merged_y_offset = -65 - not_merged_y_offset = 45 - else: - merged_y_offset = -45 - not_merged_y_offset = 25 - - # mean time to first response - glyph = Rect(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, -0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[0]) - first_response_glyph = p.add_glyph(merged_source, glyph) - first_response_glyphs.append(first_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_first_response_mean', x_offset=0, y_offset=merged_y_offset, # -60, - text_font_size="12pt", text_color=colors[0], - source=merged_source, text_align='center') - p.add_layout(labels) - - # for nonmerged - glyph = Rect(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[0]) - first_response_glyph = p.add_glyph(not_merged_source, glyph) - first_response_glyphs.append(first_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_first_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_first_response_mean', x_offset=0, y_offset=not_merged_y_offset, - # 40, - text_font_size="12pt", text_color=colors[0], - source=not_merged_source, text_align='center') - p.add_layout(labels) - - # mean time to last response - glyph = Rect(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, -0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[1]) - last_response_glyph = p.add_glyph(merged_source, glyph) - last_response_glyphs.append(last_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_last_response_mean', x_offset=merged_x_offset, - y_offset=merged_y_offset, # -60, - text_font_size="12pt", text_color=colors[1], - source=merged_source, text_align='center') - p.add_layout(labels) - - # for nonmerged - glyph = Rect(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0.1, range=p.y_range), - width=x_max / 100, height=0.08 * len(driver_df[y_axis].unique()), fill_color=colors[1]) - last_response_glyph = p.add_glyph(not_merged_source, glyph) - last_response_glyphs.append(last_response_glyph) - # Data label - labels = LabelSet(x=time_unit + '_to_last_response_mean', y=dodge(y_axis, 0, range=p.y_range), - text=time_unit + '_to_last_response_mean', x_offset=not_merged_x_offset, - y_offset=not_merged_y_offset, # 40, - text_font_size="12pt", text_color=colors[1], - source=not_merged_source, text_align='center') - p.add_layout(labels) - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label = "Days to Close" - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - # adjust the starting point and ending point based on the maximum of maximum of the graph - p.x_range = Range1d(maximum / 30 * -1, maximum * 1.15) - - p.yaxis.axis_label = "Repository" if y_axis == 'repo_name' else 'Year Closed' if y_axis == 'closed_year' else '' - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - p.ygrid.grid_line_color = None - p.y_range.range_padding = 0.15 - - p.outline_line_color = None - p.toolbar.logo = None - p.toolbar_location = None - - def add_legend(location, orientation, side): - legend = Legend( - items=[ - ("Mean Days to First Response", first_response_glyphs), - ("Mean Days to Last Response", last_response_glyphs), - ("Merged Mean Days to Close", merged_days_to_close_glyphs), - ("Not Merged Mean Days to Close", not_merged_days_to_close_glyphs) - ], - - location=location, - orientation=orientation, - border_line_color="black" - # title='Example Title' - ) - p.add_layout(legend, side) - - # add_legend((150, 50), "horizontal", "center") - add_legend((10, 135), "vertical", "right") - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of days between comments for all closed pull requests per month " \ - "in four categories. These four categories are All Merged, All Not Merged, Slowest 20% Merged, " \ - "and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "mean_response_times_for_PR")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/mean_days_between_PR_comments/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def mean_days_between_PR_comments(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - - time_unit = 'Days' - x_axis = 'closed_yearmonth' - y_axis = 'average_days_between_responses' - description = "All Closed" - line_group = 'merged_flag' - num_outliers_repo_map = {} - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - # gets pr_closed data - # selects only need columns (pr_closed_needed_columns) - # removes columns that cannot be NULL (pr_closed_not_null_columns) - pr_closed = df_tuple[df_type["pr_closed"]] - pr_closed_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_closed = filter_data(pr_closed, pr_closed_needed_columns) - - # gets pr_slow20_not_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_not_merged = df_tuple[df_type["pr_slow20_not_merged"]] - pr_slow20_not_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_slow20_not_merged = filter_data(pr_slow20_not_merged, pr_slow20_not_merged_needed_columns) - - # gets pr_slow20_merged data - # selects only need columns (pr_slow20_not_merged_needed_columns) - # removes columns that cannot be NULL (pr_slow20_not_merged_not_null_columns) - pr_slow20_merged = df_tuple[df_type["pr_slow20_merged"]] - pr_slow20_merged_needed_columns = ['repo_id', 'repo_name', x_axis, 'average_time_between_responses', line_group] - pr_slow20_merged = filter_data(pr_slow20_merged, pr_slow20_merged_needed_columns) - - if len(pr_closed) == 0 or len(pr_slow20_not_merged) == 0 or len(pr_slow20_merged) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - try: - pr_closed['average_days_between_responses'] = pr_closed['average_time_between_responses'].map( - lambda x: x.days).astype(float) - pr_slow20_not_merged['average_days_between_responses'] = pr_slow20_not_merged[ - 'average_time_between_responses'].map(lambda x: x.days).astype(float) - pr_slow20_merged['average_days_between_responses'] = pr_slow20_merged['average_time_between_responses'].map( - lambda x: x.days).astype(float) - except: - return Response(response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - data_dict = {'All': pr_closed, 'Slowest 20%': pr_slow20_not_merged.append(pr_slow20_merged, ignore_index=True)} - - plot_width = 950 - p1 = figure(x_axis_type="datetime", - title="{}: Mean {} Between Comments by Month Closed for {} Pull Requests".format(repo_dict[repo_id], time_unit, description), - plot_width=plot_width, x_range=(data_dict["All"][x_axis].min(), data_dict["All"][x_axis].max()), plot_height=500, - toolbar_location=None) - colors = Category20[10][6:] - color_index = 0 - - glyphs = [] - - possible_maximums = [] - for data_desc, input_df in data_dict.items(): - - driver_df = input_df.copy() - - driver_df = remove_outliers(driver_df, y_axis, num_outliers_repo_map) - - driver_df = driver_df.loc[driver_df['repo_id'] == repo_id] - index = 0 - - driver_df_mean = driver_df.groupby(['repo_id', line_group, x_axis], as_index=False).mean() - - title_ending = '' - if repo_id: - title_ending += ' for Repo: {}'.format(repo_id) - - for group_num, line_group_value in enumerate(driver_df[line_group].unique(), color_index): - glyphs.append(p1.line(driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][x_axis], - driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][y_axis], - color=colors[group_num], line_width=3)) - color_index += 1 - possible_maximums.append( - max(driver_df_mean.loc[driver_df_mean[line_group] == line_group_value][y_axis].dropna())) - for repo, num_outliers in num_outliers_repo_map.items(): - p1.add_layout( - Title(text="** {} outliers for {} were removed".format(num_outliers, repo), align="center"), - "below") - - p1.grid.grid_line_alpha = 0.3 - p1.xaxis.axis_label = 'Month Closed' - p1.xaxis.ticker.desired_num_ticks = 15 - p1.yaxis.axis_label = 'Mean {} Between Responses'.format(time_unit) - p1.legend.location = "top_left" - - legend = Legend( - items=[ - ("All Not Merged / Rejected", [glyphs[0]]), - ("All Merged / Accepted", [glyphs[1]]), - ("Slowest 20% Not Merged / Rejected", [glyphs[2]]), - ("Slowest 20% Merged / Accepted", [glyphs[3]]) - ], - - location='center_right', - orientation='vertical', - border_line_color="black" - ) - - p1.add_layout(legend, 'right') - - p1.title.text_font_size = "16px" - - p1.xaxis.axis_label_text_font_size = "16px" - p1.xaxis.major_label_text_font_size = "16px" - - p1.yaxis.axis_label_text_font_size = "16px" - p1.yaxis.major_label_text_font_size = "16px" - p1.xaxis.major_label_orientation = 45.0 - - p1.y_range = Range1d(0, max(possible_maximums) * 1.15) - - plot = p1 - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average number of days between comments for all" \ - " closed pull requests per month in four categories. These four categories" \ - " are All Merged, All Not Merged, Slowest 20% Merged, and Slowest 20% Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "mean_days_between_PR_comments")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/PR_time_to_first_response/'.format(AUGUR_API_VERSION), methods=["GET"]) -def PR_time_to_first_response(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - remove_outliers = str(request.args.get('remove_outliers', "true")) - - x_axis = 'pr_closed_at' - y_axis = 'days_to_first_response' - description = 'All' - group_by = 'merged_flag' - legend_position = 'top_right' - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', x_axis, group_by, y_axis] - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - driver_df = pr_closed.copy() - - outliers_removed = 0 - - if remove_outliers == "true": - driver_df, outliers_removed, outlier_cutoff = remove_outliers_by_standard_deviation(driver_df, 'days_to_first_response') - - group_by_groups = sorted(driver_df[group_by].unique()) - - # setup color pallete - try: - # FIXME repo_set is not defined - colors = Colorblind[len(repo_set)] - except: - colors = Colorblind[3] - - title_beginning = '{}: '.format(repo_dict[repo_id]) - plot_width = 180 * 5 - p = figure(x_range=( - driver_df[x_axis].min() - datetime.timedelta(days=30), driver_df[x_axis].max() + datetime.timedelta(days=25)), - # (driver_df[y_axis].min(), driver_df[y_axis].max()), - toolbar_location=None, - title='{}Days to First Response for {} Closed Pull Requests'.format(title_beginning, description), - plot_width=plot_width, - plot_height=400, x_axis_type='datetime') - - for index, group_by_group in enumerate(group_by_groups): - p.scatter(x_axis, y_axis, color=colors[index], marker="square", - source=driver_df.loc[driver_df[group_by] == group_by_group], legend_label=group_by_group) - - if group_by_group == "Merged / Accepted": - merged_values = driver_df.loc[driver_df[group_by] == group_by_group][y_axis].dropna().values.tolist() - else: - not_merged_values = driver_df.loc[driver_df[group_by] == group_by_group][ - y_axis].dropna().values.tolist() - - values = not_merged_values + merged_values - - if outliers_removed > 0: - if repo_id: - p.add_layout(Title( - text="** Outliers cut off at {} days: {} outlier(s) for {} were removed **".format(outlier_cutoff, - outliers_removed, - repo_dict[ - repo_id]), - align="center"), "below") - else: - p.add_layout(Title( - text="** Outliers cut off at {} days: {} outlier(s) were removed **".format(outlier_cutoff, - outliers_removed), - align="center"), "below") - - p.xaxis.axis_label = 'Date Closed' if x_axis == 'pr_closed_at' else 'Date Created' if x_axis == 'pr_created_at' else 'Date' - p.yaxis.axis_label = 'Days to First Response' - p.legend.location = legend_position - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - if len(values) == 0: - return Response(response="There is no message data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # determine y_max by finding the max of the values and scaling it up a small amoutn - y_max = max(values) * 1.015 - - p.y_range = Range1d(0, y_max) - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the days to first reponse for individual pull requests, either Merged or Not Merged." - p = add_caption_to_plot(p, caption) - - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "PR_time_to_first_response")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(grid, timeout=180) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/average_PR_events_for_closed_PRs/'.format(AUGUR_API_VERSION), - methods=["GET"]) -def average_PR_events_for_closed_PRs(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - return_json = request.args.get('return_json', "false") - include_comments = str(request.args.get('include_comments', True)) - - x_axis = 'closed_year' - facet = 'merged_flag' - columns = 2 - x_max = 1100 - y_axis = 'repo_name' - description = 'All Closed' - optional_comments = ['comment_count'] if include_comments else [] - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', 'repo_name', x_axis, 'assigned_count', - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'mentioned_count', - 'referenced_count', - 'closed_count', - 'head_ref_force_pushed_count', - 'merged_count', - 'milestoned_count', - 'unlabeled_count', - 'head_ref_deleted_count', facet] + optional_comments - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - repo_dict = {repo_id: pr_closed.loc[pr_closed['repo_id'] == repo_id].iloc[0]['repo_name']} - - colors = linear_gradient('#f5f5dc', '#fff44f', 150)['hex'] - - driver_df = pr_closed.copy() - driver_df[x_axis] = driver_df[x_axis].astype(str) - - if facet == 'closed_year' or y_axis == 'closed_year': - driver_df['closed_year'] = driver_df['closed_year'].astype(int).astype(str) - - y_groups = [ - 'review_requested_count', - 'labeled_count', - 'subscribed_count', - 'referenced_count', - 'closed_count', - # 'milestoned_count', - ] + optional_comments - - optional_group_comments = ['comment'] if include_comments else [] - # y_groups = ['subscribed', 'mentioned', 'labeled', 'review_requested', 'head_ref_force_pushed', - # 'referenced', 'closed', 'merged', 'unlabeled', 'head_ref_deleted', 'milestoned', 'assigned'] - # + optional_group_comments - - x_groups = sorted(list(driver_df[x_axis].unique())) - - grid_array = [] - grid_row = [] - - for index, facet_group in enumerate(sorted(driver_df[facet].unique())): - - facet_data = driver_df.loc[driver_df[facet] == facet_group] - # display(facet_data.sort_values('merged_count', ascending=False).head(50)) - driver_df_mean = facet_data.groupby(['repo_id', 'repo_name', x_axis], as_index=False).mean().round(1) - - # if a record is field in a record is Nan then it is not counted by count() so when it is not - # 2 meaning both rows have a value, there is not enough data - if (driver_df_mean['assigned_count'].count() != 2 or driver_df_mean[ - 'review_requested_count'].count() != 2 or driver_df_mean['labeled_count'].count() != 2 or - driver_df_mean['subscribed_count'].count() != 2 or driver_df_mean['mentioned_count'].count() != 2 or - driver_df_mean['referenced_count'].count() != 2 or - driver_df_mean['closed_count'].count() != 2 or driver_df_mean[ - 'head_ref_force_pushed_count'].count() != 2 or driver_df_mean['merged_count'].count() != 2 or - driver_df_mean['milestoned_count'].count() != 2 or driver_df_mean['unlabeled_count'].count() != 2 or - driver_df_mean['head_ref_deleted_count'].count() != 2 or - driver_df_mean['comment_count'].count() != 2): - return Response(response="There is not enough data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - # print(driver_df_mean.to_string()) - # data = {'Y' : y_groups} - # for group in y_groups: - # data[group] = driver_df_mean[group].tolist() - plot_width = 700 - p = figure(y_range=y_groups, plot_height=500, plot_width=plot_width, x_range=x_groups, - title='{}'.format(format(facet_group))) - - for y_group in y_groups: - driver_df_mean['field'] = y_group - source = ColumnDataSource(driver_df_mean) - mapper = LinearColorMapper(palette=colors, low=driver_df_mean[y_group].min(), - high=driver_df_mean[y_group].max()) - - p.rect(y='field', x=x_axis, width=1, height=1, source=source, - line_color=None, fill_color=transform(y_group, mapper)) - # Data label - labels = LabelSet(x=x_axis, y='field', text=y_group, y_offset=-8, - text_font_size="12pt", text_color='black', - source=source, text_align='center') - p.add_layout(labels) - - color_bar = ColorBar(color_mapper=mapper, location=(0, 0), - ticker=BasicTicker(desired_num_ticks=9), - formatter=PrintfTickFormatter(format="%d")) - # p.add_layout(color_bar, 'right') - - p.y_range.range_padding = 0.1 - p.ygrid.grid_line_color = None - - p.legend.location = "bottom_right" - p.axis.minor_tick_line_color = None - p.outline_line_color = None - - p.xaxis.axis_label = 'Year Closed' - p.yaxis.axis_label = 'Event Type' - - p.title.align = "center" - p.title.text_font_size = "15px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "16px" - - p.yaxis.axis_label_text_font_size = "16px" - p.yaxis.major_label_text_font_size = "16px" - - grid_row.append(p) - if index % columns == columns - 1: - grid_array.append(grid_row) - grid_row = [] - grid = gridplot(grid_array) - - # create caption plot - caption_plot = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average count of several different event types for " \ - "closed pull requests per year. It spilits the pull requests into two categories, " \ - "Merged / Accepted, and Not Merged / Rejected, so the similarities and differences are clear." - - caption_plot.add_layout(Label(x=0, y=380, x_units='screen', y_units='screen', text='{}'.format(caption), - text_font='times', text_font_size='15pt', render_mode='css')) - - # caption_plot.outline_line_color = None - caption_plot.toolbar_location = None - - # create title plot - title_plot = figure(width=plot_width, height=50, margin=(0, 0, 0, 0)) - title = '{}: Average Pull Request Event Types for {} Pull Requests'.format(repo_dict[repo_id], description) - - title_plot.add_layout(Label(x=550, y=0, x_units='screen', y_units='screen', text='{}'.format(title), - text_font='times', text_font_size='17px', - text_font_style='bold', render_mode='css')) - - # title_plot.outline_line_color = None - title_plot.toolbar_location = None - - layout = column([title_plot, grid, caption_plot], sizing_mode='scale_width') - - if return_json == "true": - var = Response(response=json.dumps(json_item(layout, "average_PR_events_for_closed_PRs")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - filename = export_png(layout, timeout=181) # , webdriver=selenium.webdriver.firefox.webdriver) - - return send_file(filename) - -@app.route('/{}/pull_request_reports/Average_PR_duration/'.format(AUGUR_API_VERSION), methods=["GET"]) -def Average_PR_duration(): - - repo_id, start_date, end_date, error = get_repo_id_start_date_and_end_date() - - if error: - return Response(response=error["message"], - mimetype='application/json', - status=error["status_code"]) - - group_by = str(request.args.get('group_by', "month")) - return_json = request.args.get('return_json', "false") - remove_outliers = str(request.args.get('remove_outliers', "true")) - - x_axis = 'repo_name' - group_by = 'merged_flag' - y_axis = 'closed_yearmonth' - description = "All Closed" - heat_field = 'pr_duration_days' - columns = 2 - - df_type = get_df_tuple_locations() - - df_tuple = pull_request_data_collection(repo_id=repo_id, start_date=start_date, end_date=end_date) - - pr_closed = df_tuple[df_type["pr_closed"]] - needed_columns = ['repo_id', y_axis, group_by, x_axis, 'pr_closed_at', 'pr_created_at'] - pr_closed = filter_data(pr_closed, needed_columns) - - if len(pr_closed) == 0: - return Response(response="There is no data for this repo, in the database you are accessing", - mimetype='application/json', - status=200) - - pr_duration_frame = pr_closed.assign(pr_duration=(pr_closed['pr_closed_at'] - pr_closed['pr_created_at'])) - pr_duration_frame = pr_duration_frame.assign( - pr_duration_days=(pr_duration_frame['pr_duration'] / datetime.timedelta(minutes=1)) / 60 / 24) - - repo_dict = {repo_id: pr_duration_frame.loc[pr_duration_frame['repo_id'] == repo_id].iloc[0]['repo_name']} - - red_green_gradient = linear_gradient('#0080FF', '#DC143C', 150)['hex'] # 32CD32 - - driver_df = pr_duration_frame.copy() - - driver_df[y_axis] = driver_df[y_axis].astype(str) - - # add new group by + xaxis column - driver_df['grouped_x'] = driver_df[x_axis] + ' - ' + driver_df[group_by] - - driver_df_mean = driver_df.groupby(['grouped_x', y_axis], as_index=False).mean() - - colors = red_green_gradient - y_groups = driver_df_mean[y_axis].unique() - x_groups = sorted(driver_df[x_axis].unique()) - grouped_x_groups = sorted(driver_df_mean['grouped_x'].unique()) - - # defualt outliers removed to 0 - outliers_removed = 0 - - if remove_outliers == "true": - driver_df_mean, outliers_removed, outlier_cutoff = remove_outliers_by_standard_deviation(driver_df_mean, - heat_field) - - values = driver_df_mean[heat_field].values.tolist() - - heat_max = max(values) * 1.02 - - mapper = LinearColorMapper(palette=colors, low=driver_df_mean[heat_field].min(), - high=heat_max) # driver_df_mean[heat_field].max()) - - source = ColumnDataSource(driver_df_mean) - title_beginning = repo_dict[repo_id] + ':' - plot_width = 1100 - p = figure(plot_width=plot_width, plot_height=300, - title="{} Mean Duration (Days) {} Pull Requests".format(title_beginning, description), - y_range=grouped_x_groups[::-1], x_range=y_groups, - toolbar_location=None, tools="") # , x_axis_location="above") - - for x_group in x_groups: - outliers = driver_df_mean.loc[ - (driver_df_mean[heat_field] > heat_max) & (driver_df_mean['grouped_x'].str.contains(x_group))] - - if outliers_removed > 0: - p.add_layout(Title( - text="** Outliers capped at {} days: {} outlier(s) for {} were capped at {} **".format( - outlier_cutoff, outliers_removed, x_group, outlier_cutoff), align="center"), "below") - - p.rect(x=y_axis, y='grouped_x', width=1, height=1, source=source, - line_color=None, fill_color=transform(heat_field, mapper)) - - color_bar = ColorBar(color_mapper=mapper, location=(0, 0), - ticker=BasicTicker(desired_num_ticks=9), - formatter=PrintfTickFormatter(format="%d")) - - p.add_layout(color_bar, 'right') - - p.title.align = "center" - p.title.text_font_size = "16px" - - p.axis.axis_line_color = None - p.axis.major_tick_line_color = None - p.axis.major_label_text_font_size = "11pt" - p.axis.major_label_standoff = 0 - p.xaxis.major_label_orientation = 1.0 - p.xaxis.axis_label = 'Month Closed' if y_axis[0:6] == 'closed' else 'Date Created' if y_axis[ - 0:7] == 'created' else 'Repository' if y_axis == 'repo_name' else '' - # p.yaxis.axis_label = 'Merged Status' - - p.title.text_font_size = "16px" - - p.xaxis.axis_label_text_font_size = "16px" - p.xaxis.major_label_text_font_size = "14px" - - p.yaxis.major_label_text_font_size = "15px" - - plot = p - - p = figure(width=plot_width, height=200, margin=(0, 0, 0, 0)) - caption = "This graph shows the average duration of all closed pull requests. " \ - "Red represents a slow response relative to the others, while blue a light blue " \ - "represents a fast response relative to the others. Blank cells represents months " \ - "without pull requests." - p = add_caption_to_plot(p, caption) - caption_plot = p - - grid = gridplot([[plot], [caption_plot]]) - - if return_json == "true": - var = Response(response=json.dumps(json_item(grid, "Average_PR_duration")), - mimetype='application/json', - status=200) - - var.headers["Access-Control-Allow-Orgin"] = "*" - - return var - - # opts = FirefoxOptions() - # opts.add_argument("--headless") - # driver = webdriver.Firefox(firefox_options=opts) - # newt = get_screenshot_as_png(grid, timeout=180, webdriver=selenium.webdriver.firefox.webdriver) - # filename = export_png(grid, timeout=180, webdriver=selenium.webdriver.firefox.webdriver) - filename = export_png(grid, timeout=180) - - # return sendfile(newt) - return send_file(filename) diff --git a/augur/api/view/init.py b/augur/api/view/init.py index 869b383a62..1737131352 100644 --- a/augur/api/view/init.py +++ b/augur/api/view/init.py @@ -19,9 +19,6 @@ def init_settings(): settings["cache_expiry"] = 604800 settings["serving"] = "http://augur.chaoss.io/api/unstable" settings["pagination_offset"] = 25 - # Put reports.yml in the same directory as the config file - config_dir = configFile.parent - settings["reports"] = os.path.join(config_dir, "reports.yml") settings["session_key"] = secrets.token_hex() def write_settings(current_settings): @@ -33,63 +30,6 @@ def write_settings(current_settings): with open(configFile, 'w') as file: yaml.dump(current_settings, file) -# default reports definition -reports = { - "pull_request_reports":[ - { - "url":"average_commits_per_PR", - "description":"Average commits per pull request" - }, - { - "url":"average_comments_per_PR", - "description":"Average comments per pull request" - }, - { - "url":"PR_counts_by_merged_status", - "description":"Pull request counts by merged status" - }, - { - "url":"mean_response_times_for_PR", - "description":"Mean response times for pull requests" - }, - { - "url":"mean_days_between_PR_comments", - "description":"Mean days between pull request comments" - }, - { - "url":"PR_time_to_first_response", - "description":"Pull request time until first response" - }, - { - "url":"average_PR_events_for_closed_PRs", - "description":"Average pull request events for closed pull requests" - }, - { - "url":"Average_PR_duration", - "description":"Average pull request duration" - } - ], - "contributor_reports":[ - { - "url":"new_contributors_bar", - "description":"New contributors bar graph" - }, - { - "url":"returning_contributors_pie_chart", - "description":"Returning contributors pie chart" - } - ], - "contributor_reports_stacked":[ - { - "url":"new_contributors_stacked_bar", - "description":"New contributors stacked bar chart" - }, - { - "url":"returning_contributors_stacked_bar", - "description":"Returning contributors stacked bar chart" - } - ] -} # Initialize logging def init_logging(): diff --git a/augur/api/view/routes.py b/augur/api/view/routes.py index 00d456733f..91d23531b4 100644 --- a/augur/api/view/routes.py +++ b/augur/api/view/routes.py @@ -221,13 +221,9 @@ def user_settings(): """ @app.route('/repos/views/repo/') def repo_repo_view(id): - # For some reason, there is no reports definition (shouldn't be possible) - if reports is None: - return render_message("Report Definitions Missing", "You requested a report for a repo on this instance, but a definition for the report layout was not found.") - repo = Repo.get_by_id(db_session, id) - return render_module("repo-info", reports=reports.keys(), images=reports, title="Repo", repo=repo, repo_id=id) + return render_module("repo-info", title="Repo", repo=repo, repo_id=id) """ ---------------------------------------------------------------- default: diff --git a/augur/api/view/utils.py b/augur/api/view/utils.py index aae5140cd7..dbfdd1b121 100644 --- a/augur/api/view/utils.py +++ b/augur/api/view/utils.py @@ -70,34 +70,6 @@ def getSetting(key, section = "View"): #version_check(settings) -""" ---------------------------------------------------------------- -""" -def loadReports(): - global reports - try: - with open(getSetting("reports")) as file: - reports = yaml.load(file, Loader=yaml.FullLoader) - id = -1 - for report in reports: - for image in reports[report]: - image['id'] = id = id + 1 - return True - except Exception as err: - logger.error(f"An exception occurred reading reports endpoints from [{getSetting('reports')}]:") - logger.error(err) - try: - with open(getSetting("reports"), 'w') as file: - logger.info("Attempting to generate default reports.yml") - yaml.dump(reports, file) - logger.info("Default reports file successfully generated.") - except Exception as ioErr: - logger.error("Error creating default report configuration:") - logger.error(ioErr) - return False - -if not loadReports(): - loadReports() - cache_files_requested = [] """ ---------------------------------------------------------------- @@ -160,58 +132,6 @@ def download(url, cmanager, filename, image_cache, image_id, repo_id = None): logger.error("An exception occurred writing a cache file to disk") logger.error(err) -""" ---------------------------------------------------------------- -""" -def requestReports(repo_id): - # If this request has already been fulfilled, no need to process it again - if(repo_id in report_requests.keys()): - return - - # initialize a new request entry to hold the resulting data - report_requests[repo_id] = {} - report_requests[repo_id]['complete'] = False - - host = getSetting("host", "Server") - port = getSetting("port", "Server") - - """ ---------- - If the report definition could not be loaded, we cannot determine what - files to request from the backend to compose the report. Returning here - causes the completion status of the request to be False, which will - display an error message when sent to the frontend. - """ - if reports is None: - return - - threadPools = [] - reportImages = {} - for report in reports: - # Reports is a dictionary of lists, so we get the size of each list - size = len(reports[report]) - - # Set up various threading components to manage image downloading - connection_mgr = urllib3.PoolManager(maxsize=size) - thread_pool = ThreadPoolExecutor(size) - threadPools.append(thread_pool) - - for image in reports[report]: - # Where should the downloaded image be stored (in cache) - filename = toCacheFilename(f"{image['url']}?repo_id={repo_id}") - # Where are we downloading the image from - image_url = f"{host}:{port}" + url_for(image['url'], repo_id = repo_id) - # f"{getSetting('serving')}/{image['url']}?repo_id={repo_id}" - - # Add a request for this image to the thread pool using the download function - thread_pool.submit(download, image_url, connection_mgr, filename, reportImages, image['id'], repo_id) - - # Wait for all connections to resolve, then clean up - for thread_pool in threadPools: - thread_pool.shutdown() - - report_requests[repo_id]['images'] = reportImages - - # Remove the request from the queue when completed - report_requests[repo_id]['complete'] = True """ ---------------------------------------------------------------- renderRepos: diff --git a/augur/application/cli/github.py b/augur/application/cli/github.py index 4896bf05fb..0fa1f2967c 100644 --- a/augur/application/cli/github.py +++ b/augur/application/cli/github.py @@ -56,7 +56,7 @@ def update_api_key(): core_reset_header = "Core Reset Time" graphql_request_header = "Graphql Requests Left" graphql_reset_header = "Graphql Reset Time" - print(f"{'Key'.center(40)} {core_request_header} {core_reset_header} {graphql_request_header} {graphql_reset_header}") + print(f"{'Key'.center(40)} {core_request_header} {core_reset_header.center(24)} {graphql_request_header} {graphql_reset_header.center(24)}") for key, core_key_data, graphql_key_data in valid_key_data: core_requests = str(core_key_data['requests_remaining']).center(len(core_request_header)) core_reset_time = str(epoch_to_local_time_with_am_pm(core_key_data["reset_epoch"])).center(len(core_reset_header)) @@ -87,13 +87,12 @@ def update_api_key(): engine.dispose() - def epoch_to_local_time_with_am_pm(epoch): - local_time = datetime.fromtimestamp(epoch) - formatted_time = local_time.strftime('%I:%M %p') # This format includes the date as well + # Convert epoch to local time with timezone awareness + local_time = datetime.fromtimestamp(epoch).astimezone() + formatted_time = local_time.strftime('%I:%M %p %Z (UTC%z)').center(24) return formatted_time - def find_duplicates(lst): counter = Counter(lst) return [item for item, count in counter.items() if count > 1] diff --git a/augur/application/config.py b/augur/application/config.py index e3e93302eb..2cc6f65cdb 100644 --- a/augur/application/config.py +++ b/augur/application/config.py @@ -4,7 +4,8 @@ from typing import List, Any, Optional import os from augur.application.db.models import Config -from augur.application.db.util import execute_session_query +from augur.application.db.util import execute_session_query, convert_type_of_value +from pathlib import Path def get_development_flag_from_config(): @@ -50,7 +51,8 @@ def get_development_flag(): "pull_repos": 1, "rebuild_caches": 1, "run_analysis": 1, - "run_facade_contributors": 1 + "run_facade_contributors": 1, + "facade_contributor_full_recollect": 0 }, "Server": { "cache_expire": "3600", @@ -109,35 +111,6 @@ def get_development_flag(): } -def convert_type_of_value(config_dict, logger=None): - - data_type = config_dict["type"] - - if data_type == "str" or data_type is None: - return config_dict - - elif data_type == "int": - config_dict["value"] = int(config_dict["value"]) - - elif data_type == "bool": - value = config_dict["value"] - - if value.lower() == "false": - config_dict["value"] = False - else: - config_dict["value"] = True - - elif data_type == "float": - config_dict["value"] = float(config_dict["value"]) - - else: - if logger: - logger.error(f"Need to add support for {data_type} types to config") - else: - print(f"Need to add support for {data_type} types to config") - - return config_dict - class AugurConfig(): from augur.application.db.session import DatabaseSession @@ -150,7 +123,12 @@ def __init__(self, logger, session: DatabaseSession): self.logger = logger self.accepted_types = ["str", "bool", "int", "float", "NoneType"] - self.default_config = default_config + config_dir = Path(os.getenv("CONFIG_DATADIR", "./")) + config_path = config_dir.joinpath("augur.json") + if config_path.exists(): + self.default_config = json.loads(config_path.read_text(encoding="UTF-8")) + else: + self.default_config = default_config def get_section(self, section_name) -> dict: """Get a section of data from the config. diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index de0d9aaa81..eaa99fd394 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -457,7 +457,7 @@ def extract_needed_gitlab_issue_label_data(labels: List[dict], repo_id: int, too -def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Retrieve only the needed data for pr labels from the api response @@ -487,7 +487,7 @@ def extract_needed_issue_message_ref_data(message: dict, issue_id: int, repo_id: return message_ref_dict # retrieve only the needed data for pr labels from the api response -def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_pr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: message_ref_dict = { 'pull_request_id': pull_request_id, @@ -1128,7 +1128,7 @@ def extract_needed_mr_metadata(mr_dict, repo_id, pull_request_id, tool_source, t return all_meta -def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_gitlab_issue_message_ref_data(message: dict, issue_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Extract the message id for a given message on an issue from an api response and connect it to the relevant repo id. @@ -1190,7 +1190,7 @@ def extract_needed_gitlab_message_data(comment: dict, platform_id: int, repo_id: return comment_dict -def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> List[dict]: +def extract_needed_gitlab_mr_message_ref_data(comment: dict, pull_request_id: int, repo_id: int, tool_source: str, tool_version: str, data_source: str) -> dict: """ Retrieve only the needed data for pr labels from the api response diff --git a/augur/application/db/engine.py b/augur/application/db/engine.py index 2870909093..0ea2bc1730 100644 --- a/augur/application/db/engine.py +++ b/augur/application/db/engine.py @@ -10,7 +10,7 @@ from augur.application.db.util import catch_operational_error -def parse_database_string(db_string: str) -> str: +def parse_database_string(db_string: str) -> tuple[str,str, str, str, str]: """Parse database string into the following components: username, password, host, port, database """ diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index b4004d7734..09820168fc 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -13,41 +13,11 @@ from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus, UserGroup, RepoGroup from augur.tasks.util.collection_state import CollectionState from augur.application.db import get_session, get_engine -from augur.application.db.util import execute_session_query +from augur.application.db.util import execute_session_query, convert_type_of_value from augur.application.db.session import remove_duplicates_by_uniques, remove_null_characters_from_list_of_dicts logger = logging.getLogger("db_lib") -def convert_type_of_value(config_dict, logger=None): - - - data_type = config_dict["type"] - - if data_type == "str" or data_type is None: - return config_dict - - if data_type == "int": - config_dict["value"] = int(config_dict["value"]) - - elif data_type == "bool": - value = config_dict["value"] - - if value.lower() == "false": - config_dict["value"] = False - else: - config_dict["value"] = True - - elif data_type == "float": - config_dict["value"] = float(config_dict["value"]) - - else: - if logger: - logger.error(f"Need to add support for {data_type} types to config") - else: - print(f"Need to add support for {data_type} types to config") - - return config_dict - def get_section(section_name) -> dict: """Get a section of data from the config. @@ -247,32 +217,52 @@ def facade_bulk_insert_commits(logger, records): session.rollback() if len(records) > 1: - logger.error(f"Ran into issue when trying to insert commits \n Error: {e}") - #split list into halves and retry insert until we isolate offending record firsthalfRecords = records[:len(records)//2] secondhalfRecords = records[len(records)//2:] facade_bulk_insert_commits(logger, firsthalfRecords) facade_bulk_insert_commits(logger, secondhalfRecords) - elif len(records) == 1 and isinstance(e,DataError) and "time zone displacement" in f"{e}": + elif len(records) == 1: commit_record = records[0] #replace incomprehensible dates with epoch. #2021-10-11 11:57:46 -0500 # placeholder_date = "1970-01-01 00:00:15 -0500" - placeholder_date = commit_record['author_timestamp'] + placeholder_date = commit_record['cmt_author_timestamp'] + + postgres_valid_timezones = { + -1200, -1100, -1000, -930, -900, -800, -700, + -600, -500, -400, -300, -230, -200, -100, 000, + 100, 200, 300, 330, 400, 430, 500, 530, 545, 600, + 630, 700, 800, 845, 900, 930, 1000, 1030, 1100, 1200, + 1245, 1300, 1400 + } # Reconstruct timezone portion of the date string to UTC - placeholder_date = re.split("[-+]", placeholder_date) - placeholder_date.pop() - placeholder_date = "-".join(placeholder_date) + "+0000" + placeholder_date_segments = re.split(" ", placeholder_date) + tzdata = placeholder_date_segments.pop() + + if ":" in tzdata: + tzdata = tzdata.replace(":", "") + + if int(tzdata) not in postgres_valid_timezones: + tzdata = "+0000" + else: + raise e + + placeholder_date_segments.append(tzdata) + + placeholder_date = " ".join(placeholder_date_segments) #Check for improper utc timezone offset #UTC timezone offset should be between -14:00 and +14:00 - commit_record['author_timestamp'] = placeholder_date - commit_record['committer_timestamp'] = placeholder_date + # analyzecommit.generate_commit_record() defines the keys on the commit_record dictionary + commit_record['cmt_author_timestamp'] = placeholder_date + commit_record['cmt_committer_timestamp'] = placeholder_date + + logger.warning(f"commit with invalid timezone set to UTC: {commit_record['cmt_commit_hash']}") session.execute( s.insert(Commit), @@ -281,31 +271,32 @@ def facade_bulk_insert_commits(logger, records): session.commit() else: raise e - -def batch_insert_contributors(logger, data: Union[List[dict], dict]) -> Optional[List[dict]]: - - batch_size = 1000 +def batch_insert_contributors(logger, data: Union[List[dict], dict], batch_size = 1000) -> Optional[List[dict]]: for i in range(0, len(data), batch_size): batch = data[i:i + batch_size] bulk_insert_dicts(logger, batch, Contributor, ['cntrb_id']) + + return None -def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: +def bulk_insert_dicts(logger, data_input: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: - if isinstance(data, list) is False: + if isinstance(data_input, list) is False: # if a dict is passed to data then # convert it to a list with one value - if isinstance(data, dict) is True: - data = [data] + if isinstance(data_input, dict) is True: + data = [data_input] else: logger.error("Data must be a list or a dict") return None + else: + data = list(data_input) if len(data) == 0: # self.logger.info("Gave no data to insert, returning...") @@ -407,8 +398,9 @@ def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys if deadlock_detected is True: logger.error("Made it through even though Deadlock was detected") - - return "success" + + # success + return None # othewise it gets the requested return columns and returns them as a list of dicts diff --git a/augur/application/db/models/augur_data.py b/augur/application/db/models/augur_data.py index 9212bcc5e9..c80077d9b6 100644 --- a/augur/application/db/models/augur_data.py +++ b/augur/application/db/models/augur_data.py @@ -939,7 +939,7 @@ def is_valid_github_repo(gh_session, url: str) -> bool: ) wait_until_time = localtime(wait_until) logger.error(f"rate limited fetching {url}") - logger.error(f"sleeping until {wait_until_time.tm_hour}:{wait_until_time.tm_min} ({wait_in_seconds} seconds)") + logger.error(f"sleeping until {wait_until_time.tm_hour:02d}:{wait_until_time.tm_min:02d} ({wait_in_seconds} seconds)") sleep(wait_in_seconds) attempts+=1 continue @@ -2885,7 +2885,7 @@ class PullRequestAssignee(Base): @classmethod def from_github(cls, assignee, repo_id, tool_source, tool_version, data_source): - pr_assignee_ojb = cls() + pr_assignee_obj = cls() # store the pr_url data on in the pr assignee data for now so we can relate it back to a pr later pr_assignee_obj.contrib_id = assignee["cntrb_id"] @@ -3600,4 +3600,4 @@ class RepoClone(Base): count_clones = Column(BigInteger) clone_data_timestamp = Column(TIMESTAMP(precision=6)) - repo = relationship("Repo") \ No newline at end of file + repo = relationship("Repo") diff --git a/augur/application/db/session.py b/augur/application/db/session.py index a26fc172b7..661e989dd4 100644 --- a/augur/application/db/session.py +++ b/augur/application/db/session.py @@ -93,18 +93,20 @@ def fetchall_data_from_sql_text(self,sql_text): result = connection.execute(sql_text) return [dict(row) for row in result.mappings()] - def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: + def insert_data(self, data_input: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]: - if isinstance(data, list) is False: + if isinstance(data_input, list) is False: # if a dict is passed to data then # convert it to a list with one value - if isinstance(data, dict) is True: - data = [data] + if isinstance(data_input, dict) is True: + data = [data_input] else: self.logger.info("Data must be a list or a dict") return None + else: + data = list(data_input) if len(data) == 0: # self.logger.info("Gave no data to insert, returning...") @@ -166,7 +168,7 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s # if there is no data to return then it executes the insert then returns nothing if not return_columns: - + # TODO: duplicate-looking code alert while attempts < 10: try: #begin keyword is needed for sqlalchemy 2.x @@ -205,8 +207,9 @@ def insert_data(self, data: Union[List[dict], dict], table, natural_keys: List[s if deadlock_detected is True: self.logger.error("Made it through even though Deadlock was detected") - - return "success" + + # success + return None # othewise it gets the requested return columns and returns them as a list of dicts diff --git a/augur/application/db/util.py b/augur/application/db/util.py index 9fa49ab00d..81f24ea6dd 100644 --- a/augur/application/db/util.py +++ b/augur/application/db/util.py @@ -58,3 +58,33 @@ def convert_orm_list_to_dict_list(result): return new_list + + +def convert_type_of_value(config_dict, logger=None): + + data_type = config_dict["type"] + + if data_type == "str" or data_type is None: + return config_dict + + elif data_type == "int": + config_dict["value"] = int(config_dict["value"]) + + elif data_type == "bool": + value = config_dict["value"] + + if value.lower() == "false": + config_dict["value"] = False + else: + config_dict["value"] = True + + elif data_type == "float": + config_dict["value"] = float(config_dict["value"]) + + else: + if logger: + logger.error(f"Need to add support for {data_type} types to config") + else: + print(f"Need to add support for {data_type} types to config") + + return config_dict \ No newline at end of file diff --git a/augur/application/schema/alembic/versions/34_add_contrib_to_config.py b/augur/application/schema/alembic/versions/34_add_contrib_to_config.py new file mode 100644 index 0000000000..1a87be365e --- /dev/null +++ b/augur/application/schema/alembic/versions/34_add_contrib_to_config.py @@ -0,0 +1,57 @@ +"""Add Facade contributor full recollect to config, default to off (0) + +Revision ID: 34 +Revises: 33 +Create Date: 2025-10-09 12:03:57.171011 + +""" +from alembic import op +from augur.application.db.session import DatabaseSession +from augur.application.config import * +from sqlalchemy.sql import text +import logging + +# revision identifiers, used by Alembic. +revision = '34' +down_revision = '33' +branch_labels = None +depends_on = None + +logger = logging.getLogger(__name__) + +def upgrade(): + + with DatabaseSession(logger) as session: + config = AugurConfig(logger,session) + config_dict = config.load_config() + + #Update the missing fields of the facade section in the config + section = config_dict.get("Facade") + + #Just copy the default if section doesn't exist. + if section: + if 'facade_contributor_full_recollect' not in section.keys(): + section['facade_contributor_full_recollect'] = 0 + + else: + section = config.default_config["Facade"] + + config.add_section_from_json("Facade", section) + + +def downgrade(): + + conn = op.get_bind() + + conn.execute(text(f""" + DELETE FROM augur_operations.config + WHERE section_name='Facade' AND (setting_name='facade_contributor_full_recollect'); + """)) + + try: + conn.execute(text(f""" + DELETE FROM augur_operations.config + WHERE section_name='Facade' AND (setting_name='facade_contributor_full_recollect'); + """)) + except: + pass \ No newline at end of file diff --git a/augur/application/schema/repo_load_sample.csv b/augur/application/schema/repo_load_sample.csv index b04519f30f..fb537d4949 100644 --- a/augur/application/schema/repo_load_sample.csv +++ b/augur/application/schema/repo_load_sample.csv @@ -1,8 +1,8 @@ -10,https://github.com/chaoss/augur.git -10,https://github.com/chaoss/grimoirelab.git -20,https://github.com/chaoss/wg-evolution.git -20,https://github.com/chaoss/wg-risk.git -20,https://github.com/chaoss/wg-common.git -20,https://github.com/chaoss/wg-value.git -20,https://github.com/chaoss/wg-diversity-inclusion.git -20,https://github.com/chaoss/wg-app-ecosystem.git +https://github.com/chaoss/augur.git,10 +https://github.com/chaoss/grimoirelab.git,10 +https://github.com/chaoss/wg-evolution.git,20 +https://github.com/chaoss/wg-risk.git,20 +https://github.com/chaoss/wg-common.git,20 +https://github.com/chaoss/wg-value.git,20 +https://github.com/chaoss/wg-diversity-inclusion.git,20 +https://github.com/chaoss/wg-app-ecosystem.git,20 diff --git a/augur/application/util.py b/augur/application/util.py index 03e591df98..af11d7d367 100644 --- a/augur/application/util.py +++ b/augur/application/util.py @@ -24,4 +24,4 @@ def get_all_repos_count(**kwargs): result = controller.get_repo_count(source="all", **kwargs) - return result + return result \ No newline at end of file diff --git a/augur/tasks/git/facade_tasks.py b/augur/tasks/git/facade_tasks.py index ce03524e0f..5baaed20d4 100644 --- a/augur/tasks/git/facade_tasks.py +++ b/augur/tasks/git/facade_tasks.py @@ -112,8 +112,6 @@ def trim_commits_post_analysis_facade_task(repo_git): repo = repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - start_date = facade_helper.get_setting('start_date') - logger.info(f"Generating sequence for repo {repo_id}") repo = get_repo_by_repo_git(repo_git) @@ -123,7 +121,7 @@ def trim_commits_post_analysis_facade_task(repo_git): repo_loc = (f"{absolute_path}/.git") # Grab the parents of HEAD - parent_commits = get_parent_commits_set(repo_loc, start_date) + parent_commits = get_parent_commits_set(repo_loc) # Grab the existing commits from the database existing_commits = get_existing_commits_set(repo_id) @@ -237,8 +235,6 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id - start_date = facade_helper.get_setting('start_date') - logger.info(f"Generating sequence for repo {repo_id}") repo = get_repo_by_repo_id(repo_id) @@ -248,7 +244,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None: repo_loc = (f"{absolute_path}/.git") # Grab the parents of HEAD - parent_commits = get_parent_commits_set(repo_loc, start_date) + parent_commits = get_parent_commits_set(repo_loc) # Grab the existing commits from the database existing_commits = get_existing_commits_set(repo_id) @@ -438,11 +434,6 @@ def generate_analysis_sequence(logger,repo_git, facade_helper): analysis_sequence = [] - #repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git) - #repos = fetchall_data_from_sql_text(repo_list) - - start_date = facade_helper.get_setting('start_date') - #repo_ids = [repo['repo_id'] for repo in repos] #repo_id = repo_ids.pop(0) @@ -473,8 +464,6 @@ def facade_phase(repo_git, full_collection): #repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git) #repos = fetchall_data_from_sql_text(repo_list) - start_date = facade_helper.get_setting('start_date') - #repo_ids = [repo['repo_id'] for repo in repos] #repo_id = repo_ids.pop(0) @@ -491,7 +480,6 @@ def facade_phase(repo_git, full_collection): #force_analysis = session.force_analysis run_facade_contributors = facade_helper.run_facade_contributors - facade_sequence = [] facade_core_collection = [] if not limited_run or (limited_run and pull_repos): @@ -509,14 +497,12 @@ def facade_phase(repo_git, full_collection): #These tasks need repos to be cloned by facade before they can work. - facade_sequence.append( - group( - chain(*facade_core_collection), - process_dependency_metrics.si(repo_git), - process_libyear_dependency_metrics.si(repo_git), - process_scc_value_metrics.si(repo_git) - ) + facade_sequence = group( + chain(*facade_core_collection), + process_dependency_metrics.si(repo_git), + process_libyear_dependency_metrics.si(repo_git), + process_scc_value_metrics.si(repo_git) ) logger.info(f"Facade sequence: {facade_sequence}") - return chain(*facade_sequence) \ No newline at end of file + return facade_sequence \ No newline at end of file diff --git a/augur/tasks/git/util/facade_worker/facade_worker/config.py b/augur/tasks/git/util/facade_worker/facade_worker/config.py index c62034a94e..f060b34390 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/config.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/config.py @@ -127,6 +127,7 @@ def __init__(self,logger: Logger): self.rebuild_caches = worker_options["rebuild_caches"] self.multithreaded = worker_options["multithreaded"] self.create_xlsx_summary_files = worker_options["create_xlsx_summary_files"] + self.facade_contributor_full_recollect = worker_options["facade_contributor_full_recollect"] self.tool_source = "Facade" self.data_source = "Git Log" @@ -166,7 +167,13 @@ def log_activity(self, level, status): # Log an activity based upon urgency and user's preference. If the log level is # "Debug", then just print it and don't save it in the database. log_options = ('Error','Quiet','Info','Verbose','Debug') - self.logger.info(f"* {status}\n") + logmsg = f"* {status}\n" + if level == "Error": + self.logger.error(logmsg) + elif level == "Debug" or level == "Verbose": + self.logger.debug(logmsg) + else: + self.logger.info(logmsg) #Return if only debug if level == 'Debug': @@ -244,6 +251,17 @@ def insert_or_update_data(self, query, **bind_args)-> None: return def inc_repos_processed(self): self.repos_processed += 1 + +# def get_last_collected_commit_date(self,repo_id): +# commit_date_query = s.sql.text(""" +# SELECT cmt_committer_timestamp FROM commits +# WHERE repo_id=:repo_id +# ORDER BY data_collection_date DESC +# LIMIT 1; +# """).bindparams(repo_id=repo_id) +# +# result = execute_sql(commit_date_query).fetchone() +# return result[0] """ class FacadeConfig: diff --git a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py index 874f338902..f754f4e098 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/repofetch.py @@ -435,7 +435,7 @@ def git_repo_updates(facade_helper, repo_git): cmdpull2 = (f"git -C {absolute_path} pull") - cmd_reset = (f"git -C {absolute_path} reset --hard origin") + cmd_reset = (f"git -C {absolute_path} reset --hard origin/{remotedefault}") cmd_reset_wait = subprocess.Popen( [cmd_reset], shell=True).wait() diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index caae6c02ba..c06614ac7d 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -105,10 +105,10 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name): return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}" -def get_parent_commits_set(absolute_repo_path, start_date): +def get_parent_commits_set(absolute_repo_path): parents = subprocess.Popen(["git --git-dir %s log --ignore-missing " - "--pretty=format:'%%H' --since=%s" % (absolute_repo_path,start_date)], + "--pretty=format:'%%H'" % (absolute_repo_path)], stdout=subprocess.PIPE, shell=True) parent_commits = set(parents.stdout.read().decode("utf-8",errors="ignore").split(os.linesep)) diff --git a/augur/tasks/github/facade_github/tasks.py b/augur/tasks/github/facade_github/tasks.py index 1b11f98223..eff64df6ee 100644 --- a/augur/tasks/github/facade_github/tasks.py +++ b/augur/tasks/github/facade_github/tasks.py @@ -8,6 +8,7 @@ from augur.application.db.models import Contributor from augur.tasks.github.facade_github.core import * from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors +from augur.application.db.lib import get_session, execute_session_query from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * @@ -198,6 +199,12 @@ def insert_facade_contributors(self, repo_git): logger = logging.getLogger(insert_facade_contributors.__name__) repo = get_repo_by_repo_git(repo_git) repo_id = repo.repo_id + facade_helper = FacadeHelper(logger) + + with get_session() as session: + query = session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo.repo_id) + collection_status = execute_session_query(query,'one') + last_collected_date = collection_status.facade_data_last_collected if not facade_helper.facade_contributor_full_recollect else None # Get all of the commit data's emails and names from the commit table that do not appear # in the contributors table or the contributors_aliases table. @@ -214,6 +221,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE commits.repo_id = :repo_id + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND (NOT EXISTS ( SELECT contributors.cntrb_canonical FROM contributors WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email ) or NOT EXISTS ( SELECT contributors_aliases.alias_email from contributors_aliases where contributors_aliases.alias_email = commits.cmt_author_raw_email) AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name )) @@ -231,6 +239,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE commits.repo_id = :repo_id + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND EXISTS ( SELECT unresolved_commit_emails.email FROM unresolved_commit_emails WHERE unresolved_commit_emails.email = commits.cmt_author_raw_email ) AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name ) GROUP BY @@ -239,7 +248,7 @@ def insert_facade_contributors(self, repo_git): commits.cmt_author_raw_email ORDER BY hash - """).bindparams(repo_id=repo_id) + """).bindparams(repo_id=repo_id,since_date=last_collected_date) #Execute statement with session. result = execute_sql(new_contrib_sql) @@ -257,7 +266,6 @@ def insert_facade_contributors(self, repo_git): logger.debug("DEBUG: Got through the new_contribs") - facade_helper = FacadeHelper(logger) # sql query used to find corresponding cntrb_id's of emails found in the contributor's table # i.e., if a contributor already exists, we use it! resolve_email_to_cntrb_id_sql = s.sql.text(""" @@ -271,6 +279,7 @@ def insert_facade_contributors(self, repo_git): commits WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email + AND (:since_date is NULL OR commits.data_collection_date > :since_date) AND commits.repo_id = :repo_id UNION SELECT DISTINCT @@ -286,7 +295,8 @@ def insert_facade_contributors(self, repo_git): contributors_aliases.alias_email = commits.cmt_author_raw_email AND contributors.cntrb_id = contributors_aliases.cntrb_id AND commits.repo_id = :repo_id - """).bindparams(repo_id=repo_id) + AND (:since_date is NULL OR commits.data_collection_date > :since_date) + """).bindparams(repo_id=repo_id,since_date=last_collected_date) result = execute_sql(resolve_email_to_cntrb_id_sql) diff --git a/augur/tasks/github/releases/core.py b/augur/tasks/github/releases/core.py index 239b83dce9..255b34cf89 100644 --- a/augur/tasks/github/releases/core.py +++ b/augur/tasks/github/releases/core.py @@ -23,7 +23,7 @@ def get_release_inf(repo_id, release, tag_only): release_inf = { - 'release_id': release['id'], + 'release_id': str(release['id']).strip(), 'repo_id': repo_id, 'release_name': release['name'], 'release_description': release['description'] if release['description'] is not None else '', @@ -51,7 +51,7 @@ def get_release_inf(repo_id, release, tag_only): author = "nobody" date = "" release_inf = { - 'release_id': release['id'], + 'release_id': str(release['id']).strip(), 'repo_id': repo_id, 'release_name': release['name'], 'release_description': 'tag_only', @@ -67,17 +67,23 @@ def get_release_inf(repo_id, release, tag_only): def insert_release(session, logger, repo_id, owner, release, tag_only = False): - # Get current table values + # Get current table values with proper trimming logger.info('Getting release table values\n') query = session.query(Release.release_id).filter(Release.repo_id == repo_id) - release_id_data = execute_session_query(query, 'all')#pd.read_sql(release_id_data_sql, self.db, params={'repo_id': repo_id}) - release_id_data = [str(r_id).strip() for r_id in release_id_data]#release_id_data.apply(lambda x: x.str.strip()) + release_id_data = execute_session_query(query, 'all') + existing_release_ids = {str(r_id).strip() for r_id in release_id_data} # Put all data together in format of the table logger.info(f'Inserting release for repo with id:{repo_id}, owner:{owner}, release name:{release["name"]}\n') release_inf = get_release_inf(repo_id, release, tag_only) + + # Check if release already exists (with proper trimming) + new_release_id = str(release_inf['release_id']).strip() + if new_release_id in existing_release_ids: + logger.info(f"Release {new_release_id} already exists for repo {repo_id}, skipping insertion\n") + return - #Do an upsert + #Do an upsert with string field cleaning string_fields = ["release_name", "release_description", "release_author", "release_tag_name"] bulk_insert_dicts(logger, release_inf,Release,['release_id'], string_fields=string_fields) diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index db8d2239d4..d1209fadd0 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -241,8 +241,11 @@ def setup_periodic_tasks(sender, **kwargs): sender.add_periodic_task(thirty_days_in_seconds, non_repo_domain_tasks.s()) mat_views_interval = int(config.get_value('Celery', 'refresh_materialized_views_interval_in_days')) - logger.info(f"Scheduling refresh materialized view every night at 1am CDT") - sender.add_periodic_task(datetime.timedelta(days=mat_views_interval), refresh_materialized_views.s()) + if mat_views_interval > 0: + logger.info(f"Scheduling refresh materialized view every night at 1am CDT") + sender.add_periodic_task(datetime.timedelta(days=mat_views_interval), refresh_materialized_views.s()) + else: + logger.info(f"Refresh materialized view task is disabled.") # logger.info(f"Scheduling update of collection weights on midnight each day") # sender.add_periodic_task(crontab(hour=0, minute=0),augur_collection_update_weights.s()) diff --git a/augur/tasks/util/collection_util.py b/augur/tasks/util/collection_util.py index bed73bd120..28489d63c8 100644 --- a/augur/tasks/util/collection_util.py +++ b/augur/tasks/util/collection_util.py @@ -597,37 +597,33 @@ def send_messages(self): for repo_git, full_collection in col_hook.repo_list: repo = get_repo_by_repo_git(repo_git) + platform_name = "github" + # this needs to be here and not up a level since it should be set/reset for each repo. + # otherwise a gitlab repo would reset it and cause subsequent github repos to use gitlab phases. + phases = None if "github" in repo.repo_git: - augur_collection_sequence = [] - for job in col_hook.phases: - #Add the phase to the sequence in order as a celery task. - #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git, full_collection)) - - #augur_collection_sequence.append(core_task_success_util.si(repo_git)) - #Link all phases in a chain and send to celery - augur_collection_chain = chain(*augur_collection_sequence) - task_id = augur_collection_chain.apply_async().task_id - - self.logger.info(f"Setting github repo {col_hook.name} status to collecting for repo: {repo_git}") - - #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated - yield repo_git, task_id, col_hook.name - else: - if col_hook.gitlab_phases is not None: - - augur_collection_sequence = [] - for job in col_hook.gitlab_phases: - #Add the phase to the sequence in order as a celery task. - #The preliminary task creates the larger task chain - augur_collection_sequence.append(job(repo_git, full_collection)) - - #augur_collection_sequence.append(core_task_success_util.si(repo_git)) - #Link all phases in a chain and send to celery - augur_collection_chain = chain(*augur_collection_sequence) - task_id = augur_collection_chain.apply_async().task_id - - self.logger.info(f"Setting gitlab repo {col_hook.name} status to collecting for repo: {repo_git}") - - #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated - yield repo_git, task_id, col_hook.name + phases = col_hook.phases + # use default platform name + + elif "gitlab" in repo.repo_git: + platform_name = "gitlab" + if col_hook.gitlab_phases is None: + continue + phases = col_hook.gitlab_phases + + augur_collection_sequence = [] + for job in phases: + #Add the phase to the sequence in order as a celery task. + #The preliminary task creates the larger task chain + augur_collection_sequence.append(job(repo_git, full_collection)) + + #augur_collection_sequence.append(core_task_success_util.si(repo_git)) + #Link all phases in a chain and send to celery + augur_collection_chain = chain(*augur_collection_sequence) + task_id = augur_collection_chain.apply_async().task_id + + self.logger.info(f"Setting {platform_name} repo {col_hook.name} status to collecting for repo: {repo_git}") + + #yield the value of the task_id to the calling method so that the proper collectionStatus field can be updated + yield repo_git, task_id, col_hook.name + diff --git a/augur/tasks/util/random_key_auth.py b/augur/tasks/util/random_key_auth.py index f2fea35b36..dc59544aef 100644 --- a/augur/tasks/util/random_key_auth.py +++ b/augur/tasks/util/random_key_auth.py @@ -3,23 +3,19 @@ from httpx import Auth, Request, Response from random import choice - +import hashlib +from augur.util.keys import mask_key class RandomKeyAuth(Auth): - """Custom Auth class for httpx that randomly assigns an api key to each request + """Custom Auth class for httpx that randomly assigns an api key to each request. Attributes: - list_of_keys ([str]): list of keys which are randomly selected from on each request - header_name (str): name of header that the keys need to be set to + list_of_keys (List[str]): list of keys which are randomly selected from on each request + header_name (str): name of header that the keys need to be set to key_format (str): format string that defines the structure of the key and leaves a {} for the key to be inserted """ - - # pass a list of keys that are strings - # pass the name of the header that you would like to be set on the request - # Optionally pass the key_format. This is a string that contains a {} so the key can be added and applied to the header in the correct way. - # For example on github the keys are formatted like "token asdfasfdasf" where asdfasfdasf is the key. So for github - # the key_format="token {0}" + def __init__(self, list_of_keys: List[str], header_name: str, logger, key_format: Optional[str] = None): self.list_of_keys = list_of_keys self.header_name = header_name @@ -27,27 +23,26 @@ def __init__(self, list_of_keys: List[str], header_name: str, logger, key_format self.logger = logger def auth_flow(self, request: Request) -> Generator[Request, Response, None]: + """Attach a randomly selected API key to the request headers.""" + if not self.list_of_keys: + self.logger.error("No valid keys available to make a request.") + yield request + return + + key_value = choice(self.list_of_keys) - # the choice function is from the random library, and gets a random value from a list - # this gets a random key from the list - - if self.list_of_keys: - key_value = choice(self.list_of_keys) - self.logger.debug(f'Key value used in request: {key_value}') - # formats the key string into a format GitHub will accept - - if self.key_format: - key_string = self.key_format.format(key_value) - else: - key_string = key_value - - # set the headers of the request with the new key - request.headers[self.header_name] = key_string - #self.logger.info(f"List of Keys: {self.list_of_keys}") + # Log only masked or hashed form, never the full key + masked = mask_key(key_value) + self.logger.debug(f"Key used for request (masked): {masked}") + # Apply formatting if needed + if self.key_format: + key_string = self.key_format.format(key_value) else: - self.logger.error(f"There are no valid keys to make a request with: {self.list_of_keys}") + key_string = key_value - # sends the request back with modified headers + # Set header + request.headers[self.header_name] = key_string + # sends the request back with modified headers # basically it saves our changes to the request object yield request diff --git a/augur/templates/repo-info.j2 b/augur/templates/repo-info.j2 index 311daa45f7..2738d70e2a 100644 --- a/augur/templates/repo-info.j2 +++ b/augur/templates/repo-info.j2 @@ -5,21 +5,7 @@ {% if repo.repo_id %}

Report for: {{ repo.repo_name|title }}

{{ repo.repo_git }}

- {% for report in reports %} -

{{ report|replace("_", " ")|title }}

- {% for image in images[report] %} -
-
-
-
-
- -
-
- {% endfor %} - {% endfor %} + {% else %}

Repository {{ repo_id }} not found

{% endif %} diff --git a/augur/util/keys.py b/augur/util/keys.py new file mode 100644 index 0000000000..31ef63d0cb --- /dev/null +++ b/augur/util/keys.py @@ -0,0 +1,5 @@ +def mask_key(key: str, first: int = 6, last: int = 3, stars: int = 6) -> str: + """Mask key except for the first and last few characters.""" + if not isinstance(key, str) or len(key) <= (first + last): + return "*" * stars + return f"{key[:first]}{'*' * stars}{key[-last:]}" \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index b32f0a1696..f0ef41015b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,6 +73,7 @@ services: - REDIS_CONN_STRING=redis://redis:6379 - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-augur_vhost} - CONFIG_LOCATION=/config/config.yml + - CONFIG_DATADIR=/config - CACHE_DATADIR=/cache - CACHE_LOCKDIR=/cache - CELERYBEAT_SCHEDULE_DB=/tmp/celerybeat-schedule.db diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index e627dc6681..5a8bfaaa3e 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -20,7 +20,9 @@ RUN go install github.com/ossf/scorecard/v5@v5.1.1 \ FROM python:3.11-slim-bullseye LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.0" + +ARG VERSION +LABEL version=${VERSION} ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/bin/:/usr/local/bin:/usr/lib:${PATH}" diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index 50e7653af2..e4393fe0b3 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -2,7 +2,9 @@ FROM postgres:16 LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.0" + +ARG VERSION +LABEL version=${VERSION} ENV POSTGRES_DB="test" ENV POSTGRES_USER="augur" diff --git a/docker/keyman/Dockerfile b/docker/keyman/Dockerfile index ed77ef18d4..3fe1996223 100644 --- a/docker/keyman/Dockerfile +++ b/docker/keyman/Dockerfile @@ -1,7 +1,9 @@ FROM python:3.11.12-alpine LABEL maintainer="outdoors@acm.org" -LABEL version="0.90.0" + +ARG VERSION +LABEL version=${VERSION} RUN pip install --no-cache-dir --upgrade pip diff --git a/docker/rabbitmq/Dockerfile b/docker/rabbitmq/Dockerfile index ad86dfebb7..69babd3be8 100644 --- a/docker/rabbitmq/Dockerfile +++ b/docker/rabbitmq/Dockerfile @@ -1,7 +1,9 @@ -FROM rabbitmq:3.12-management-alpine +FROM rabbitmq:4.1-management-alpine LABEL maintainer="574/augur@simplelogin.com" -LABEL version="0.90.0" + +ARG VERSION +LABEL version=${VERSION} ARG RABBIT_MQ_DEFAULT_USER=augur ARG RABBIT_MQ_DEFAULT_PASSWORD=password123 diff --git a/docker/rabbitmq/definitions.json b/docker/rabbitmq/definitions.json index 1cd8cc172e..d5fd9faef2 100644 --- a/docker/rabbitmq/definitions.json +++ b/docker/rabbitmq/definitions.json @@ -1,5 +1,5 @@ { - "rabbit_version": "3.12", + "rabbit_version": "4.1", "users": [ { "name": "", diff --git a/docs/source/conf.py b/docs/source/conf.py index 45966f19ec..94921bd5d1 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,9 +23,11 @@ here = os.path.abspath(os.path.dirname(__file__)) -exec(open(os.path.join(here, "../../metadata.py")).read()) +# Add the project root (two levels up: docs/source → augur) +sys.path.insert(0, os.path.abspath(os.path.join(here, '../..'))) -sys.path.insert(0, os.path.abspath('../../../augur')) +# Now importing variables from metadata.py +from metadata import __copyright__, __release__, __version__ # -- General configuration ------------------------------------------------ diff --git a/docs/source/development-guide/create-a-metric/api-development.rst b/docs/source/development-guide/create-a-metric/api-development.rst index 834b42e8e0..05e1ebb977 100644 --- a/docs/source/development-guide/create-a-metric/api-development.rst +++ b/docs/source/development-guide/create-a-metric/api-development.rst @@ -133,10 +133,6 @@ There is also, generally, a block in a standard metric for pulling data by a rep 'begin_date': begin_date, 'end_date': end_date}) return results -Existing Visualization Metrics Files: --------------------------------------------- -1. augur/routes/contributor_reports.py -2. augur/routes/pull_request_reports.py Existing Metrics Files: -------------------------------------------- diff --git a/docs/source/docker/docker-compose.rst b/docs/source/docker/docker-compose.rst index d96476c392..1bc3a25a6f 100644 --- a/docs/source/docker/docker-compose.rst +++ b/docs/source/docker/docker-compose.rst @@ -26,7 +26,7 @@ This section of the documentation details how to use Augur's Docker Compose conf .. warning:: - Don't forget to provide your external database credentials in a file called ``.env`` file. Make sure the following environment variables are specified. + Don't forget to provide your external database credentials in a file called ``.env`` file. Make sure all the following environment variables are specified, keep placeholder values if you don't need some of them. Don't specify AUGUR_DB if you want the docker database to be used. Example .env: diff --git a/docs/source/docker/getting-started.rst b/docs/source/docker/getting-started.rst index e747bbb304..c413d9ed45 100644 --- a/docs/source/docker/getting-started.rst +++ b/docs/source/docker/getting-started.rst @@ -31,7 +31,7 @@ the following resources (or more): - 10 GB RAM Clone the Augur repository and create a .env file in the top level directory -with the following fields: +with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python diff --git a/docs/source/docker/quick-start.rst b/docs/source/docker/quick-start.rst index b7d7b7cc7e..c7530f6ae9 100644 --- a/docs/source/docker/quick-start.rst +++ b/docs/source/docker/quick-start.rst @@ -9,7 +9,7 @@ Before you get off to such a quick start, go ahead and git checkout main - 4. Create a .env file in the top level directory with the following fields: + 4. Create a .env file in the top level directory with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python diff --git a/docs/source/getting-started/command-line-interface/db.rst b/docs/source/getting-started/command-line-interface/db.rst index b754f2e067..a810f1b9d7 100644 --- a/docs/source/getting-started/command-line-interface/db.rst +++ b/docs/source/getting-started/command-line-interface/db.rst @@ -78,14 +78,14 @@ Example usage\: .. code-block:: bash # contents of repos.csv - 10,https://github.com/chaoss/augur.git - 10,https://github.com/chaoss/grimoirelab.git - 20,https://github.com/chaoss/wg-evolution.git - 20,https://github.com/chaoss/wg-risk.git - 20,https://github.com/chaoss/wg-common.git - 20,https://github.com/chaoss/wg-value.git - 20,https://github.com/chaoss/wg-diversity-inclusion.git - 20,https://github.com/chaoss/wg-app-ecosystem.git + https://github.com/chaoss/augur.git,10 + https://github.com/chaoss/grimoirelab.git,10 + https://github.com/chaoss/wg-evolution.git,20 + https://github.com/chaoss/wg-risk.git,20 + https://github.com/chaoss/wg-common.git,20 + https://github.com/chaoss/wg-value.git,20 + https://github.com/chaoss/wg-diversity-inclusion.git,20 + https://github.com/chaoss/wg-app-ecosystem.git,20 # to add repos to the database $ augur db add-repos repos.csv diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index d2a79c4f71..41bc1be4dc 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -31,9 +31,7 @@ Required: -**Python 3.9 is not yet supported because TensorFlow, which we use in our machine learning workers, does not yet support Python 3.9.** - -Our REST API & data collection workers write in Python 3.6. We query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository, so GitLab and GitHub access tokens are **required** for data collection. +Our REST API & data collection workers query the GitHub & GitLab API to collect data about issues, pull requests, contributors, and other information about a repository. Values for GitLab and GitHub access tokens are **required** for data collection and must be provided (an invalid token can be provided if you don't plan to use one platform) . Optional: diff --git a/docs/source/getting-started/using-docker.rst b/docs/source/getting-started/using-docker.rst index cc5e23896c..c1c693eda2 100644 --- a/docs/source/getting-started/using-docker.rst +++ b/docs/source/getting-started/using-docker.rst @@ -10,7 +10,7 @@ the following resources (or more). 1. Clone the Augur repository https://github.com/chaoss/augur -2. Create a .env file in the top level directory with the following fields: +2. Create a ``.env`` file in the top level directory with the following fields (don't remove any variable, keep placeholder values if you don't need some of them): .. code:: python @@ -35,7 +35,7 @@ or podman compose up --build -And augur should be up and running! Over time, you may decide that you want to download and run newer releases of Augur. It is critical that your `.env` file remains configured to use the same database name and password; though you can change the password if you understand how to connect to a database running inside a Docker container on your computer. +And augur should be up and running! Over time, you may decide that you want to download and run newer releases of Augur. It is critical that your ``.env`` file remains configured to use the same database name and password; though you can change the password if you understand how to connect to a database running inside a Docker container on your computer. Rebuilding Augur in Docker ---------------------------- diff --git a/docs/source/procedures/creating-releases.rst b/docs/source/procedures/creating-releases.rst index 007db72441..7ef2b32a9c 100644 --- a/docs/source/procedures/creating-releases.rst +++ b/docs/source/procedures/creating-releases.rst @@ -2,49 +2,109 @@ The Augur Release Process ========================= The first step to releasing any changes is to have changes in the first place. -Augur's `CONTRIBUTING.md `__ file contains all the information that is needed to get started with topics like reporting issues, contributing code, and understanding the code review process. +Augur's `CONTRIBUTING.md `__ file +contains all the information that is needed to get started with topics like +reporting issues, contributing code, and understanding the code review process. This document outlines how these changes end up in an Augur release after they are merged into the `main` branch. +Release Workflow +---------------- - -Release workflow: -Starting after version 0.89.3, Augur follows a workflow similar to those you may already be familiar with (such as github flow and git flow). The Augur workflow has two long-lived branches, `main` and `release` and is designed such that changes only flow in one direction - from main into release. +Starting after version **0.89.3**, Augur follows a workflow similar to those you may already +be familiar with (such as GitHub Flow and Git Flow). The Augur workflow has two long-lived branches, +`main` and `release`, and is designed such that changes only flow in one direction — from `main` into `release`. Branches +-------- + +**main** -`main` -The `main` branch is the primary development branch that is the target for all new pull requests. At any given point in time, this branch represents the best approximation of what the next upcoming release will look like. Since this is the active development branch, changes happen more frequently and this branch should be considered to be less stable than the `release` branch due to the possibility of breaking changes being made (and potentially reverted) between releases. It is not recommended for production deployment and is primarily intended for use by Augur contributors running their own copies against test data for development purposes. +The `main` branch is the primary development branch that is the target for all new pull requests. +At any given point in time, this branch represents the best approximation of what the next upcoming +release will look like. Since this is the active development branch, changes happen more frequently +and this branch should be considered to be less stable than the `release` branch due to the possibility +of breaking changes being made (and potentially reverted) between releases. It is not recommended for +production deployment and is primarily intended for use by Augur contributors running their own copies +against test data for development purposes. -`release` -The `release` branch is where all augur versions (after 0.89.3) are tagged. Each commit on this branch represents either a hotfix to the prior release or a new major or minor version. +**release** -Currently, Augur only officially supports the last-released version represented by the latest **release** tag. In most cases, the latest commit on the `release` branch is made immediately prior to a release, but always rely on the latest tagged release, not the release branch in production. +The `release` branch is where all Augur versions (after 0.89.3) are tagged. Each commit on this branch +represents either a hotfix to the prior release or a new major or minor version. + +Currently, Augur only officially supports the last-released version represented by the latest **release** tag. +In most cases, the latest commit on the `release` branch is made immediately prior to a release, but always rely +on the latest tagged release, not the `release` branch in production. .. note:: - If future needs require supporting multiple Augur versions concurrently, individual numbered release branches may be made from this central `release`` branch to allow any hotfixes to be applied to each supported version independently of the others. + If future needs require supporting multiple Augur versions concurrently, individual numbered + release branches may be made from this central `release` branch to allow any hotfixes to be applied + to each supported version independently of the others. The Release Process +------------------- + +When the next release is set to be cut, some preparation steps need to take place first. These include: + +- Ensuring all features planned for that release are merged, and any unrelated changes are delayed (as appropriate) until after the release. +- Creating a Pull Request to update any applicable metadata (such as version information and changelogs) on the `main` branch. + +Version Management (Updated) +---------------------------- + +Starting from version **0.90.0**, Augur now uses a **single source of truth** for its version information, +defined in `metadata.py`. + +Previously, the version number needed to be manually updated in several different places during a release, including: -When the next release is set to be cut, some preparation steps need to take place first, these include: -- Ensuring all features planned for that release are merged and any unrelated changes are delayed (as appropriate) until after the release. -- Creating a Pull Request to update any applicable metadata (such as version information and Changelogs) on the `main` branch. +- ``pyproject.toml`` (for Python packaging) +- Dockerfiles (used for building and tagging images) +- GitHub Actions workflow files (e.g., ``.github/workflows/build_docker.yml``) +- Any scripts or documentation pages referencing specific versions -Once all release preparation has been completed, a new Pull Request can be created to merge the main branch into the `release 'branch. This creates a final review opportunity and allows for another run of (potentially more stringent) CI jobs compared to those run on `main`, catching issues that may have come up throughout the various merges or in the process of preparing for release. +This manual process increased the chance of version mismatches between code, Docker images, and releases. -After this PR is merged, a tag is created that points to the commit on the `release` branch, effectively labeling it so that it can be returned to later if needed. This labeling process can also be the basis for additional CI jobs that build and upload the released code to distribution platforms such as Docker Hub or the GitHub Container Registry +Now, this has been **fully centralized**: + +- The version number is declared once in ``metadata.py`` as ``__version__``. +- A helper script ``get_version.py`` reads this value and dynamically injects it into Docker builds via a build argument. +- The CI/CD pipeline (GitHub Actions) also reads the same version from ``metadata.py`` when tagging builds and Docker images. + +This ensures that all parts of Augur — including Python packaging, Docker images, and release artifacts — +use the **exact same version**, automatically. + +Therefore, before tagging a new release, only the version in ``metadata.py`` needs to be updated. +All other build and deployment steps automatically consume this version during the release process. + +Once all release preparation has been completed, a new Pull Request can be created to merge the `main` +branch into the `release` branch. This creates a final review opportunity and allows for another run of +(potentially more stringent) CI jobs compared to those run on `main`, catching issues that may have come up +throughout the various merges or during the process of preparing for release. + +After this PR is merged, a tag is created that points to the commit on the `release` branch, +effectively labeling it so that it can be returned to later if needed. This labeling process can +also be the basis for additional CI jobs that build and upload the released code to distribution +platforms such as Docker Hub or the GitHub Container Registry. Why? +---- This is done to solve a number of problems: -- having changes moving in two directions at once (i.e. features coming from main, and hotfixes coming from release) was often confusing and increased the odds that a change would be missed, such as being shipped as a hotfix but not merged into the main codebase - leading to a regression in the next release. +- Having changes moving in two directions at once (i.e. features coming from `main`, and hotfixes coming from `release`) + was often confusing and increased the odds that a change would be missed, such as being shipped as a hotfix + but not merged into the main codebase — leading to a regression in the next release. + + +Special Case: Hotfixes +---------------------- +If the fix is a hotfix: -Special case: Hotfixes -if the fix was a hotfix: -- changelog updates and other metadata changes should be included as part of the PR -- this is where mergeify or something helps re-create the PR targeting the release branch directly. at which point the release process is followed +- Changelog updates and other metadata changes should be included as part of the PR. +- This is where tools like **Mergeify** can help re-create the PR targeting the `release` branch directly, + at which point the regular release process is followed. diff --git a/metadata.py b/metadata.py index da181f6526..bc38e3fe34 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.90.0" -__release__ = "v0.90.0 (Trade Deadline)" +__version__ = "0.90.3" +__release__ = "v0.90.3 (Trade Deadline)" __license__ = "MIT" __copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Sean Goggins, Brian Warner & Augurlabs 2025, Red Hat Software" diff --git a/pyproject.toml b/pyproject.toml index 6e78b2118f..a3866d86e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,8 +25,8 @@ dependencies = [ "bokeh==2.0.2", "boto3==1.17.57", "bs4==0.0.1", - "celery==5.2.7", - "click==8.0.3", + "celery~=5.5", + "click~=8.1", "cloudpickle>=0.2.2", "coloredlogs==15.0", "dask>=2021.6.2", @@ -99,7 +99,12 @@ dev = [ "pytest==6.2.5", "toml>=0.10.2", "ipdb==0.13.9", - {include-group = "docs"}, + "mypy>=1.18.2", + "types-requests>=2.31.0.6", + "types-pyyaml>=6.0.12.20250915", + "types-python-dateutil>=2.9.0.20251008", + "types-toml>=0.10.8.20240310", + { include-group = "docs" }, ] docs = [ "docutils==0.20.1", @@ -149,3 +154,10 @@ constraint-dependencies = [ # https://docs.python.org/3.10/whatsnew/3.10.html#removed "graphql-server-core>1.1.1", ] + +[tool.mypy] +files = ['augur/application/db/*.py'] +ignore_missing_imports = true +follow_imports = "skip" +disallow_untyped_defs = false +exclude_gitignore = true diff --git a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv index 8967ae2142..fb537d4949 100644 --- a/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv +++ b/tests/test_workers/test_facade/test_facade_contributor_interface/test_repos.csv @@ -1,8 +1,8 @@ -10,https://github.com/chaoss/augur.git -10,https://github.com/chaoss/grimoirelab.git -20,https://github.com/chaoss/wg-evolution.git -20,https://github.com/chaoss/wg-risk.git -20,https://github.com/chaoss/wg-common.git -20,https://github.com/chaoss/wg-value.git -20,https://github.com/chaoss/wg-diversity-inclusion.git -20,https://github.com/chaoss/wg-app-ecosystem.git \ No newline at end of file +https://github.com/chaoss/augur.git,10 +https://github.com/chaoss/grimoirelab.git,10 +https://github.com/chaoss/wg-evolution.git,20 +https://github.com/chaoss/wg-risk.git,20 +https://github.com/chaoss/wg-common.git,20 +https://github.com/chaoss/wg-value.git,20 +https://github.com/chaoss/wg-diversity-inclusion.git,20 +https://github.com/chaoss/wg-app-ecosystem.git,20 diff --git a/uv.lock b/uv.lock index 66352a8f2e..18681966a7 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.12'", @@ -218,6 +218,7 @@ dependencies = [ dev = [ { name = "docutils" }, { name = "ipdb" }, + { name = "mypy" }, { name = "pytest" }, { name = "setuptools" }, { name = "sphinx" }, @@ -226,6 +227,10 @@ dev = [ { name = "sphinxcontrib-redoc" }, { name = "toml" }, { name = "tox" }, + { name = "types-python-dateutil" }, + { name = "types-pyyaml" }, + { name = "types-requests" }, + { name = "types-toml" }, ] docs = [ { name = "docutils" }, @@ -244,8 +249,8 @@ requires-dist = [ { name = "bokeh", specifier = "==2.0.2" }, { name = "boto3", specifier = "==1.17.57" }, { name = "bs4", specifier = "==0.0.1" }, - { name = "celery", specifier = "==5.2.7" }, - { name = "click", specifier = "==8.0.3" }, + { name = "celery", specifier = "~=5.5" }, + { name = "click", specifier = "~=8.1" }, { name = "cloudpickle", specifier = ">=0.2.2" }, { name = "coloredlogs", specifier = "==15.0" }, { name = "dask", specifier = ">=2021.6.2" }, @@ -316,6 +321,7 @@ requires-dist = [ dev = [ { name = "docutils", specifier = "==0.20.1" }, { name = "ipdb", specifier = "==0.13.9" }, + { name = "mypy", specifier = ">=1.18.2" }, { name = "pytest", specifier = "==6.2.5" }, { name = "setuptools" }, { name = "sphinx", specifier = "==7.2.6" }, @@ -324,6 +330,10 @@ dev = [ { name = "sphinxcontrib-redoc", specifier = "==1.6.0" }, { name = "toml", specifier = ">=0.10.2" }, { name = "tox", specifier = "==3.24.4" }, + { name = "types-python-dateutil", specifier = ">=2.9.0.20251008" }, + { name = "types-pyyaml", specifier = ">=6.0.12.20250915" }, + { name = "types-requests", specifier = ">=2.31.0.6" }, + { name = "types-toml", specifier = ">=0.10.8.20240310" }, ] docs = [ { name = "docutils", specifier = "==0.20.1" }, @@ -364,11 +374,11 @@ wheels = [ [[package]] name = "billiard" -version = "3.6.4.0" +version = "4.2.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/92/91/40de1901da8ec9eeb7c6a22143ba5d55d8aaa790761ca31342cedcd5c793/billiard-3.6.4.0.tar.gz", hash = "sha256:299de5a8da28a783d51b197d496bef4f1595dd023a93a4f59dde1886ae905547", size = 155303, upload-time = "2021-04-01T09:23:50.092Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b9/6a/1405343016bce8354b29d90aad6b0bf6485b5e60404516e4b9a3a9646cf0/billiard-4.2.2.tar.gz", hash = "sha256:e815017a062b714958463e07ba15981d802dc53d41c5b69d28c5a7c238f8ecf3", size = 155592, upload-time = "2025-09-20T14:44:40.456Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/89/0c43de91d4e52eaa7bd748771d417f6ac9e51e66b2f61928c2151bf65878/billiard-3.6.4.0-py3-none-any.whl", hash = "sha256:87103ea78fa6ab4d5c751c4909bcff74617d985de7fa8b672cf8618afd5a875b", size = 89472, upload-time = "2021-04-01T09:23:42.019Z" }, + { url = "https://files.pythonhosted.org/packages/a6/80/ef8dff49aae0e4430f81842f7403e14e0ca59db7bbaf7af41245b67c6b25/billiard-4.2.2-py3-none-any.whl", hash = "sha256:4bc05dcf0d1cc6addef470723aac2a6232f3c7ed7475b0b580473a9145829457", size = 86896, upload-time = "2025-09-20T14:44:39.157Z" }, ] [[package]] @@ -441,7 +451,7 @@ wheels = [ [[package]] name = "celery" -version = "5.2.7" +version = "5.5.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "billiard" }, @@ -450,12 +460,12 @@ dependencies = [ { name = "click-plugins" }, { name = "click-repl" }, { name = "kombu" }, - { name = "pytz" }, + { name = "python-dateutil" }, { name = "vine" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ce/21/41a0028f6d610987c0839250357c1a00f351790b8a448c2eb323caa719ac/celery-5.2.7.tar.gz", hash = "sha256:fafbd82934d30f8a004f81e8f7a062e31413a23d444be8ee3326553915958c6d", size = 1474243, upload-time = "2022-05-29T12:58:03.046Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/7d/6c289f407d219ba36d8b384b42489ebdd0c84ce9c413875a8aae0c85f35b/celery-5.5.3.tar.gz", hash = "sha256:6c972ae7968c2b5281227f01c3a3f984037d21c5129d07bf3550cc2afc6b10a5", size = 1667144, upload-time = "2025-06-01T11:08:12.563Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/99/21fe9d1829cab4fc77d18f89d0c4cbcfe754e95f8b8f4af64fe4997c442f/celery-5.2.7-py3-none-any.whl", hash = "sha256:138420c020cd58d6707e6257b6beda91fd39af7afde5d36c6334d175302c0e14", size = 405637, upload-time = "2022-05-29T12:57:59.911Z" }, + { url = "https://files.pythonhosted.org/packages/c9/af/0dcccc7fdcdf170f9a1585e5e96b6fb0ba1749ef6be8c89a6202284759bd/celery-5.5.3-py3-none-any.whl", hash = "sha256:0b5761a07057acee94694464ca482416b959568904c9dfa41ce8413a7d65d525", size = 438775, upload-time = "2025-06-01T11:08:09.94Z" }, ] [[package]] @@ -530,14 +540,14 @@ wheels = [ [[package]] name = "click" -version = "8.0.3" +version = "8.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f4/09/ad003f1e3428017d1c3da4ccc9547591703ffea548626f47ec74509c5824/click-8.0.3.tar.gz", hash = "sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b", size = 329034, upload-time = "2021-10-10T18:07:33.001Z" } +sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/58/c8aa6a8e62cc75f39fee1092c45d6b6ba684122697d7ce7d53f64f98a129/click-8.0.3-py3-none-any.whl", hash = "sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3", size = 97516, upload-time = "2021-10-10T18:07:30.752Z" }, + { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295, upload-time = "2025-09-18T17:32:22.42Z" }, ] [[package]] @@ -2016,6 +2026,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341, upload-time = "2025-06-13T06:52:27.835Z" }, ] +[[package]] +name = "mypy" +version = "1.18.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mypy-extensions" }, + { name = "pathspec" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/6f/657961a0743cff32e6c0611b63ff1c1970a0b482ace35b069203bf705187/mypy-1.18.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eab0cf6294dafe397c261a75f96dc2c31bffe3b944faa24db5def4e2b0f77c", size = 12807973, upload-time = "2025-09-19T00:10:35.282Z" }, + { url = "https://files.pythonhosted.org/packages/10/e9/420822d4f661f13ca8900f5fa239b40ee3be8b62b32f3357df9a3045a08b/mypy-1.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a780ca61fc239e4865968ebc5240bb3bf610ef59ac398de9a7421b54e4a207e", size = 11896527, upload-time = "2025-09-19T00:10:55.791Z" }, + { url = "https://files.pythonhosted.org/packages/aa/73/a05b2bbaa7005f4642fcfe40fb73f2b4fb6bb44229bd585b5878e9a87ef8/mypy-1.18.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448acd386266989ef11662ce3c8011fd2a7b632e0ec7d61a98edd8e27472225b", size = 12507004, upload-time = "2025-09-19T00:11:05.411Z" }, + { url = "https://files.pythonhosted.org/packages/4f/01/f6e4b9f0d031c11ccbd6f17da26564f3a0f3c4155af344006434b0a05a9d/mypy-1.18.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f9e171c465ad3901dc652643ee4bffa8e9fef4d7d0eece23b428908c77a76a66", size = 13245947, upload-time = "2025-09-19T00:10:46.923Z" }, + { url = "https://files.pythonhosted.org/packages/d7/97/19727e7499bfa1ae0773d06afd30ac66a58ed7437d940c70548634b24185/mypy-1.18.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:592ec214750bc00741af1f80cbf96b5013d81486b7bb24cb052382c19e40b428", size = 13499217, upload-time = "2025-09-19T00:09:39.472Z" }, + { url = "https://files.pythonhosted.org/packages/9f/4f/90dc8c15c1441bf31cf0f9918bb077e452618708199e530f4cbd5cede6ff/mypy-1.18.2-cp310-cp310-win_amd64.whl", hash = "sha256:7fb95f97199ea11769ebe3638c29b550b5221e997c63b14ef93d2e971606ebed", size = 9766753, upload-time = "2025-09-19T00:10:49.161Z" }, + { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" }, + { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" }, + { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" }, + { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" }, + { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" }, + { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" }, + { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" }, + { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" }, + { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" }, + { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" }, + { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" }, + { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" }, + { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" }, + { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" }, + { url = "https://files.pythonhosted.org/packages/5a/0c/7d5300883da16f0063ae53996358758b2a2df2a09c72a5061fa79a1f5006/mypy-1.18.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:62f0e1e988ad41c2a110edde6c398383a889d95b36b3e60bcf155f5164c4fdce", size = 12893775, upload-time = "2025-09-19T00:10:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/50/df/2cffbf25737bdb236f60c973edf62e3e7b4ee1c25b6878629e88e2cde967/mypy-1.18.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8795a039bab805ff0c1dfdb8cd3344642c2b99b8e439d057aba30850b8d3423d", size = 11936852, upload-time = "2025-09-19T00:10:51.631Z" }, + { url = "https://files.pythonhosted.org/packages/be/50/34059de13dd269227fb4a03be1faee6e2a4b04a2051c82ac0a0b5a773c9a/mypy-1.18.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ca1e64b24a700ab5ce10133f7ccd956a04715463d30498e64ea8715236f9c9c", size = 12480242, upload-time = "2025-09-19T00:11:07.955Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/040983fad5132d85914c874a2836252bbc57832065548885b5bb5b0d4359/mypy-1.18.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d924eef3795cc89fecf6bedc6ed32b33ac13e8321344f6ddbf8ee89f706c05cb", size = 13326683, upload-time = "2025-09-19T00:09:55.572Z" }, + { url = "https://files.pythonhosted.org/packages/e9/ba/89b2901dd77414dd7a8c8729985832a5735053be15b744c18e4586e506ef/mypy-1.18.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:20c02215a080e3a2be3aa50506c67242df1c151eaba0dcbc1e4e557922a26075", size = 13514749, upload-time = "2025-09-19T00:10:44.827Z" }, + { url = "https://files.pythonhosted.org/packages/25/bc/cc98767cffd6b2928ba680f3e5bc969c4152bf7c2d83f92f5a504b92b0eb/mypy-1.18.2-cp314-cp314-win_amd64.whl", hash = "sha256:749b5f83198f1ca64345603118a6f01a4e99ad4bf9d103ddc5a3200cc4614adf", size = 9982959, upload-time = "2025-09-19T00:10:37.344Z" }, + { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "networkx" version = "3.4.2" @@ -2168,6 +2232,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/e7/40fb618334dcdf7c5a316c0e7343c5cd82d3d866edc100d98e29bc945ecd/partd-1.4.2-py3-none-any.whl", hash = "sha256:978e4ac767ec4ba5b86c6eaa52e5a2a3bc748a2ca839e8cc798f1cc6ce6efb0f", size = 18905, upload-time = "2024-05-06T19:51:39.271Z" }, ] +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, +] + [[package]] name = "pexpect" version = "4.9.0" @@ -3728,6 +3801,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, ] +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20251008" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/83/24ed25dd0c6277a1a170c180ad9eef5879ecc9a4745b58d7905a4588c80d/types_python_dateutil-2.9.0.20251008.tar.gz", hash = "sha256:c3826289c170c93ebd8360c3485311187df740166dbab9dd3b792e69f2bc1f9c", size = 16128, upload-time = "2025-10-08T02:51:34.93Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/af/5d24b8d49ef358468ecfdff5c556adf37f4fd28e336b96f923661a808329/types_python_dateutil-2.9.0.20251008-py3-none-any.whl", hash = "sha256:b9a5232c8921cf7661b29c163ccc56055c418ab2c6eabe8f917cbcc73a4c4157", size = 17934, upload-time = "2025-10-08T02:51:33.55Z" }, +] + +[[package]] +name = "types-pyyaml" +version = "6.0.12.20250915" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/69/3c51b36d04da19b92f9e815be12753125bd8bc247ba0470a982e6979e71c/types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3", size = 17522, upload-time = "2025-09-15T03:01:00.728Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/e0/1eed384f02555dde685fff1a1ac805c1c7dcb6dd019c916fe659b1c1f9ec/types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6", size = 20338, upload-time = "2025-09-15T03:00:59.218Z" }, +] + +[[package]] +name = "types-requests" +version = "2.31.0.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/b8/c1e8d39996b4929b918aba10dba5de07a8b3f4c8487bb61bb79882544e69/types-requests-2.31.0.6.tar.gz", hash = "sha256:cd74ce3b53c461f1228a9b783929ac73a666658f223e28ed29753771477b3bd0", size = 15535, upload-time = "2023-09-27T06:19:38.443Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/a1/6f8dc74d9069e790d604ddae70cb46dcbac668f1bb08136e7b0f2f5cd3bf/types_requests-2.31.0.6-py3-none-any.whl", hash = "sha256:a2db9cb228a81da8348b49ad6db3f5519452dd20a9c1e1a868c83c5fe88fd1a9", size = 14516, upload-time = "2023-09-27T06:19:36.373Z" }, +] + +[[package]] +name = "types-toml" +version = "0.10.8.20240310" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/86/47/3e4c75042792bff8e90d7991aa5c51812cc668828cc6cce711e97f63a607/types-toml-0.10.8.20240310.tar.gz", hash = "sha256:3d41501302972436a6b8b239c850b26689657e25281b48ff0ec06345b8830331", size = 4392, upload-time = "2024-03-10T02:18:37.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/a2/d32ab58c0b216912638b140ab2170ee4b8644067c293b170e19fba340ccc/types_toml-0.10.8.20240310-py3-none-any.whl", hash = "sha256:627b47775d25fa29977d9c70dc0cbab3f314f32c8d8d0c012f2ef5de7aaec05d", size = 4777, upload-time = "2024-03-10T02:18:36.568Z" }, +] + +[[package]] +name = "types-urllib3" +version = "1.26.25.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/73/de/b9d7a68ad39092368fb21dd6194b362b98a1daeea5dcfef5e1adb5031c7e/types-urllib3-1.26.25.14.tar.gz", hash = "sha256:229b7f577c951b8c1b92c1bc2b2fdb0b49847bd2af6d1cc2a2e3dd340f3bda8f", size = 11239, upload-time = "2023-07-20T15:19:31.307Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/7b/3fc711b2efea5e85a7a0bbfe269ea944aa767bbba5ec52f9ee45d362ccf3/types_urllib3-1.26.25.14-py3-none-any.whl", hash = "sha256:9683bbb7fb72e32bfe9d2be6e04875fbe1b3eeec3cbb4ea231435aa7fd6b4f0e", size = 15377, upload-time = "2023-07-20T15:19:30.379Z" }, +] + [[package]] name = "typing-extensions" version = "4.7.1"