diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index b29ab2ed89..fbce968697 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -11,45 +11,68 @@ on: release: types: - published + workflow_dispatch: permissions: {} jobs: build: + name: Build image permissions: contents: read # to fetch code (actions/checkout) packages: write # to push docker image - - name: Build image + strategy: + matrix: + image: + - backend + - database + - rabbitmq runs-on: ubuntu-latest steps: - - name: Checkout main - uses: actions/checkout@v2 - - name: Run the build - run: | - set -ex - # use that here since the variable are not present before start, so can't be in env - export LOGIN=$GITHUB_REPOSITORY_OWNER + - name: Checkout repository + uses: actions/checkout@v4 - echo $PASSWORD | docker login $REGISTRY -u $LOGIN --password-stdin + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + id: setup-buildx - for i in docker/* ; do - CONTAINER=$(basename $i) - echo "Building $CONTAINER" - export IMAGE=$LOGIN/augur_$CONTAINER - DOCKERFILE=${i}/Dockerfile + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + if: github.event_name != 'pull_request' + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} - docker build . -f $DOCKERFILE --tag $REGISTRY/$IMAGE:latest - if [[ $GITHUB_EVENT_NAME == 'release' ]]; then - TAG=$(basename $GITHUB_REF) - docker tag $REGISTRY/$IMAGE:latest $REGISTRY/$IMAGE:$TAG - docker push $REGISTRY/$IMAGE:latest - docker push $REGISTRY/$IMAGE:$TAG - elif [[ $GITHUB_EVENT_NAME == 'push' ]]; then - docker tag $REGISTRY/$IMAGE:latest $REGISTRY/$IMAGE:devel-latest - docker push $REGISTRY/$IMAGE:devel-latest - fi - done + - name: Set container metadata + uses: docker/metadata-action@v5 + id: meta env: - REGISTRY: ghcr.io - PASSWORD: ${{ secrets.GITHUB_TOKEN }} + DOCKER_METADATA_ANNOTATIONS_LEVELS: index,manifest + with: + annotations: | + org.opencontainers.image.title=augur_${{ matrix.image}} + labels: | + org.opencontainers.image.title=augur_${{ matrix.image}} + images: ghcr.io/${{ github.repository_owner }}/augur_${{ matrix.image }} + # Pushes to the dev branch update the *:devel-latest tag + # Releases update the *:latest tag and the *: tag + tags: | + type=raw,value=devel-latest,enable=${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/dev' }} + type=raw,value=latest,enable=${{ github.event_name == 'release' }} + type=raw,value=${{ github.event.release.tag_name }},enable=${{ github.event_name == 'release' }} + + - name: Build and push + id: push + uses: docker/build-push-action@v6 + with: + annotations: ${{ steps.meta.outputs.annotations }} + context: . + file: ./docker/${{ matrix.image }}/Dockerfile + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64 + # Only push if we've tagged the image in the metadata step + push: ${{ github.event_name != 'pull_request' && steps.meta.outputs.tags != '' }} + tags: ${{ steps.meta.outputs.tags }} + cache-from: type=gha,scope=container-${{ matrix.image }} + cache-to: type=gha,scope=container-${{ matrix.image }},mode=max diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml deleted file mode 100644 index 3f19c12343..0000000000 --- a/.github/workflows/docker-image.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Docker Image CI - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -jobs: - - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Build the Docker image - run: | - ls -l - docker build . --file docker/backend/Dockerfile --tag my-image-name:$(date +%s) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6368d58bf5..6fed03731b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -92,7 +92,7 @@ git push origin master ### Augur - [Stable documentation (`main` branch)](https://oss-augur.readthedocs.io/en/main/) - [Nightly/developer build documentation (`dev` branch)](https://oss-augur.readthedocs.io/en/dev/) (warning: this is should be considered an unstable branch and should not be used for production) -- [Live Augur demo](http://zephyr.osshealth.io/) +- [Live Augur demo](https://ai.chaoss.io) ### CHAOSS - [Website](https://chaoss.community/) diff --git a/README.md b/README.md index f65deb7535..33e41bcf15 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Augur NEW Release v0.81.0 +# Augur NEW Release v0.86.0 Augur is primarily a data engineering tool that makes it possible for data scientists to gather open source software community data - less data carpentry for everyone else! The primary way of looking at Augur data is through [8Knot](https://github.com/oss-aspen/8knot), a public instance of 8Knot is available [here](https://metrix.chaoss.io) - this is tied to a public instance of [Augur](https://ai.chaoss.io). @@ -11,8 +11,7 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o ## NEW RELEASE ALERT! **If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**. -<<<<<<< HEAD -Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.81.0). +Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.86.0). - The `main` branch is a stable version of our new architecture, which features: @@ -44,9 +43,9 @@ For more information on [how to get involved on the CHAOSS website](https://chao ## Collecting Data -Augur supports ```Python3.6``` through ```Python3.9``` on all platforms. ```Python3.10``` and above do not yet work because of machine learning worker dependencies. On OSX, you can create a ```Python3.9``` environment, by running: +Augur supports ```Python3.7``` through ```Python3.11``` on all platforms. ```Python3.12``` and above do not yet work because of machine learning worker dependencies. On OSX, you can create a ```Python3.11``` environment, by running: ``` -$ python3.9 -m venv path/to/venv +$ python3.11 -m venv path/to/venv ``` Augur's main focus is to measure the overall health and sustainability of open source projects. @@ -84,7 +83,7 @@ We strongly believe that much of what makes open source so great is the incredib ## License, Copyright, and Funding -Copyright © 2023 University of Nebraska at Omaha, University of Missouri, Brian Warner, and the CHAOSS Project. +Copyright © 2025 University of Nebraska at Omaha, University of Missouri, Brian Warner, and the CHAOSS Project. Augur is free software: you can redistribute it and/or modify it under the terms of the MIT License as published by the Open Source Initiative. See the [LICENSE](LICENSE) file for more details. diff --git a/Vagrantfile b/Vagrantfile deleted file mode 100644 index 95de27ce02..0000000000 --- a/Vagrantfile +++ /dev/null @@ -1,108 +0,0 @@ -$script = <<-'SCRIPT' -set -euxo pipefail - -sudo apt-get -y update -sudo apt-get -y install --no-install-recommends \ - build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libsqlite3-dev libreadline-dev libffi-dev curl libbz2-dev \ - git gcc gfortran \ - python3 python3-pip python3.8-venv \ - postgresql postgresql-contrib \ - libomp-dev \ - golang libgomp1 -sudo pg_ctlcluster 12 main start - -go get -u github.com/boyter/scc/ - -# # install Go -# installGo() ( -# cd "$(mktemp -d)" -# wget https://golang.org/dl/go1.16.5.linux-amd64.tar.gz -# rm -rf /usr/local/go && tar -C /usr/local -xzf go1.16.5.linux-amd64.tar.gz -# ) -# sudo installGo -# export PATH=$PATH:/usr/local/go/bin - - -########################################################################################## -# see: https://oss-augur.readthedocs.io/en/master/getting-started/database.html -cat < /tmp/init.psql -CREATE DATABASE augur; -CREATE USER augur WITH ENCRYPTED PASSWORD 'password'; -GRANT ALL PRIVILEGES ON DATABASE augur TO augur; -EOF -sudo -u postgres psql -U postgres -f /tmp/init.psql - - -########################################################################################## -# see: https://oss-augur.readthedocs.io/en/master/getting-started/installation.html - -mkdir -p "$HOME/augur/" "$HOME/augur/logs/" "$HOME/augur/repos/" -cat < "$HOME/augur/config.json" -{ - "Database": { - "host": "localhost", - "password": "password" - }, - "Server": { - "host": "0.0.0.0" - }, - "Logging": { - "logs_directory": "$HOME/augur/logs/", - "log_level": "INFO", - "verbose": 0, - "quiet": 0, - "debug": 1 - }, - "Workers": { - "facade_worker": { - "repo_directory": "$HOME/augur/repos/", - "switch": 1 - }, - "github_worker": { - "switch": 1 - }, - "insight_worker": { - "switch": 1 - }, - "linux_badge_worker": { - "switch": 1 - }, - "pull_request_worker": { - "switch": 1 - }, - "repo_info_worker": { - "switch": 1 - }, - "release_worker": { - "switch": 1 - } - } -} -EOF - - -python3 -m venv $HOME/.virtualenvs/augur_env -source $HOME/.virtualenvs/augur_env/bin/activate -pip install wheel - -cd /vagrant -python setup.py bdist_wheel -make clean -make install-dev - -augur config init --rc-config-file "$HOME/config.json" -augur db create-schema -augur backend start" - -SCRIPT - -Vagrant.configure("2") do |config| - config.vm.box = "ubuntu/focal64" - - config.vm.provider "virtualbox" do |v| - v.memory = 20480 - v.cpus = 4 - end - - config.vm.provision "shell", privileged: false, inline: $script -end diff --git a/augur/application/cli/__init__.py b/augur/application/cli/__init__.py index 00f41a5534..e68af307bb 100644 --- a/augur/application/cli/__init__.py +++ b/augur/application/cli/__init__.py @@ -6,6 +6,7 @@ import re import json import httpx +import traceback from augur.application.db.engine import DatabaseEngine from augur.application.db import get_engine, dispose_database_engine @@ -16,23 +17,32 @@ def test_connection(function_internet_connection): @click.pass_context def new_func(ctx, *args, **kwargs): usage = re.search(r"Usage:\s(.*)\s\[OPTIONS\]", str(ctx.get_usage())).groups()[0] + success = False with httpx.Client() as client: try: _ = client.request( method="GET", url="http://chaoss.community", timeout=10, follow_redirects=True) - - return ctx.invoke(function_internet_connection, *args, **kwargs) + success = True except (TimeoutError, httpx.TimeoutException): print("Request timed out.") - except httpx.NetworkError: + except httpx.NetworkError as e: print(f"Network Error: {httpx.NetworkError}") - except httpx.ProtocolError: + print(traceback.format_exc()) + except httpx.ProtocolError as e: print(f"Protocol Error: {httpx.ProtocolError}") - print(f"\n\n{usage} command setup failed\n \ - You are not connected to the internet.\n \ - Please connect to the internet to run Augur\n \ - Consider setting http_proxy variables for limited access installations.") - sys.exit(-1) + print(traceback.format_exc()) + + if not success: + print( + f""" + \n\n{usage} command setup failed. + There was an error while testing for network connectivity + Please check your connection to the internet to run Augur + Consider setting http_proxy variables for limited access installations.""" + ) + sys.exit(-1) + + return ctx.invoke(function_internet_connection, *args, **kwargs) return update_wrapper(new_func, function_internet_connection) diff --git a/augur/application/cli/backend.py b/augur/application/cli/backend.py index 2b5ec69042..69f93aa6ea 100644 --- a/augur/application/cli/backend.py +++ b/augur/application/cli/backend.py @@ -86,6 +86,9 @@ def start(ctx, disable_collection, development, pidfile, port): worker_vmem_cap = get_value("Celery", 'worker_process_vmem_cap') + # create rabbit messages so if it failed on shutdown the queues are clean + cleanup_collection_status_and_rabbit(logger, ctx.obj.engine) + gunicorn_command = f"gunicorn -c {gunicorn_location} -b {host}:{port} augur.api.server:app --log-file gunicorn.log" server = subprocess.Popen(gunicorn_command.split(" ")) @@ -180,7 +183,7 @@ def start(ctx, disable_collection, development, pidfile, port): try: keypub.shutdown() - cleanup_after_collection_halt(logger, ctx.obj.engine) + cleanup_collection_status_and_rabbit(logger, ctx.obj.engine) except RedisConnectionError: pass @@ -302,7 +305,7 @@ def stop_collection(ctx): logger.info(f"Waiting on [{', '.join(str(p.pid for p in alive))}]") time.sleep(0.5) - cleanup_after_collection_halt(logger, ctx.obj.engine) + cleanup_collection_status_and_rabbit(logger, ctx.obj.engine) @cli.command('kill') @test_connection @@ -330,10 +333,10 @@ def augur_stop(signal, logger, engine): _broadcast_signal_to_processes(augur_processes, broadcast_signal=signal, given_logger=logger) if "celery" in process_names: - cleanup_after_collection_halt(logger, engine) + cleanup_collection_status_and_rabbit(logger, engine) -def cleanup_after_collection_halt(logger, engine): +def cleanup_collection_status_and_rabbit(logger, engine): clear_redis_caches() connection_string = get_value("RabbitMQ", "connection_string") diff --git a/augur/application/cli/github.py b/augur/application/cli/github.py index cad13be793..4896bf05fb 100644 --- a/augur/application/cli/github.py +++ b/augur/application/cli/github.py @@ -32,7 +32,7 @@ def update_api_key(): """ SELECT value as github_key from config Where section_name='Keys' AND setting_name='github_api_key' UNION All - SELECT access_token as github_key from worker_oauth ORDER BY github_key DESC; + SELECT access_token as github_key from worker_oauth where platform='github' ORDER BY github_key DESC; """ ) diff --git a/augur/application/db/data_parse.py b/augur/application/db/data_parse.py index 71bde0aa21..6f20fa35a7 100644 --- a/augur/application/db/data_parse.py +++ b/augur/application/db/data_parse.py @@ -291,7 +291,7 @@ def extract_pr_event_data(event: dict, pr_id: int, gh_src_id: int, platform_id: 'action': event['event'], 'action_commit_hash': None, 'created_at': event['created_at'], - 'issue_event_src_id': gh_src_id, + 'issue_event_src_id': event["id"], 'node_id': event['node_id'], 'node_url': event['url'], 'tool_source': tool_source, diff --git a/augur/application/schema/alembic/versions/31_update_pr_events_unique.py b/augur/application/schema/alembic/versions/31_update_pr_events_unique.py new file mode 100644 index 0000000000..b55b60a09a --- /dev/null +++ b/augur/application/schema/alembic/versions/31_update_pr_events_unique.py @@ -0,0 +1,83 @@ +"""Update pr events unique + +Revision ID: 31 +Revises: 30 +Create Date: 2025-03-08 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy import text +from augur.application.db import create_database_engine, get_database_string + + +# revision identifiers, used by Alembic. +revision = '31' +down_revision = '30' +branch_labels = None +depends_on = None + + + # conn = op.get_bind() + # conn.execute(text(""" + # UPDATE pull_request_events + # SET issue_event_src_id = substring(node_url FROM '.*/([0-9]+)$')::BIGINT; + # """)) + + +def upgrade(): + + connection_string = get_database_string() + engine = create_database_engine(connection_string) + + with engine.connect() as conn: + + result = conn.execute(text("SELECT COUNT(*) FROM pull_request_events WHERE issue_event_src_id=pr_platform_event_id")) + total_rows = result.scalar() + if total_rows != 0: + print(f"Rows needing updated: {total_rows}") + print(f"0.0% complete") + total_updated = 0 + + while True: + result = conn.execute(text(""" + WITH cte AS ( + SELECT pr_event_id + FROM pull_request_events + WHERE issue_event_src_id=pr_platform_event_id + LIMIT 250000 + ) + UPDATE pull_request_events + SET issue_event_src_id = substring(node_url FROM '.*/([0-9]+)$')::BIGINT + FROM cte + WHERE pull_request_events.pr_event_id = cte.pr_event_id + RETURNING 1; + """)) + + conn.commit() + + rows_updated = result.rowcount + total_updated += rows_updated + + if rows_updated == 0: + print(f"Update complete") + break + + percentage_updated = (total_updated / total_rows) * 100 + + print(f"{percentage_updated:.1f}% complete ({total_rows-total_updated} rows left)") + + print("Creating (repo_id, issue_event_src_id) index") + op.create_unique_constraint('pr_events_repo_id_event_src_id_unique', 'pull_request_events', ['repo_id', 'issue_event_src_id'], schema='augur_data') + + +def downgrade(): + op.drop_constraint('pr_events_repo_id_event_src_id_unique', 'pull_request_events', schema='augur_data', type_='unique') + + print("Please run in background. This downgrade will take a very *very* long time") + conn = op.get_bind() + conn.execute(text(""" + UPDATE pull_request_events + SET issue_event_src_id = pr_platform_event_id + WHERE issue_event_src_id <> pr_platform_event_id; + """)) \ No newline at end of file diff --git a/augur/tasks/data_analysis/clustering_worker/setup.py b/augur/tasks/data_analysis/clustering_worker/setup.py index a197b21568..bd591554c3 100644 --- a/augur/tasks/data_analysis/clustering_worker/setup.py +++ b/augur/tasks/data_analysis/clustering_worker/setup.py @@ -26,7 +26,7 @@ def read(filename): 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.32.0', - 'psycopg2-binary==2.9.3', + 'psycopg2-binary==2.9.9', #'sklearn==0.0.0', 'scikit-learn==1.5.0', 'numpy==1.26.0', diff --git a/augur/tasks/data_analysis/contributor_breadth_worker/setup.py b/augur/tasks/data_analysis/contributor_breadth_worker/setup.py index 805edfb36b..70a4cd6312 100644 --- a/augur/tasks/data_analysis/contributor_breadth_worker/setup.py +++ b/augur/tasks/data_analysis/contributor_breadth_worker/setup.py @@ -27,7 +27,7 @@ def read(filename): 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.32.0', - 'psycopg2-binary==2.9.3' + 'psycopg2-binary==2.9.9' ], entry_points={ 'console_scripts': [ diff --git a/augur/tasks/data_analysis/discourse_analysis/setup.py b/augur/tasks/data_analysis/discourse_analysis/setup.py index ca936a6000..67cb4dd7e7 100644 --- a/augur/tasks/data_analysis/discourse_analysis/setup.py +++ b/augur/tasks/data_analysis/discourse_analysis/setup.py @@ -26,7 +26,7 @@ def read(filename): 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.32.0', - 'psycopg2-binary==2.9.3', + 'psycopg2-binary==2.9.9', 'click==8.0.3', 'scipy>=1.10.0', 'nltk==3.6.6', diff --git a/augur/tasks/data_analysis/insight_worker/setup.py b/augur/tasks/data_analysis/insight_worker/setup.py index 92d663e3ae..b28b4bf5ab 100644 --- a/augur/tasks/data_analysis/insight_worker/setup.py +++ b/augur/tasks/data_analysis/insight_worker/setup.py @@ -27,7 +27,7 @@ def read(filename): 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.32.0', - 'psycopg2-binary==2.9.3', + 'psycopg2-binary==2.9.9', 'click==8.0.3', 'scipy>=1.10.0', 'sklearn==0.0', diff --git a/augur/tasks/data_analysis/message_insights/setup.py b/augur/tasks/data_analysis/message_insights/setup.py index 2f86701619..bd136fdfec 100644 --- a/augur/tasks/data_analysis/message_insights/setup.py +++ b/augur/tasks/data_analysis/message_insights/setup.py @@ -28,7 +28,7 @@ def read(filename): 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.32.0', - 'psycopg2-binary==2.9.3', + 'psycopg2-binary==2.9.9', 'click==8.0.3', 'scipy>=1.10.0', 'scikit-learn==1.5.0', #0.24.2', diff --git a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py index 63ccbec1de..391b368f32 100644 --- a/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py +++ b/augur/tasks/data_analysis/pull_request_analysis_worker/setup.py @@ -26,7 +26,7 @@ def read(filename): 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.32.0', - 'psycopg2-binary==2.9.3', + 'psycopg2-binary==2.9.9', 'sklearn==0.0', 'nltk==3.6.6', 'numpy==1.26.0', diff --git a/augur/tasks/frontend.py b/augur/tasks/frontend.py index f526d9041d..cd2d3e32ba 100644 --- a/augur/tasks/frontend.py +++ b/augur/tasks/frontend.py @@ -9,7 +9,7 @@ from augur.tasks.github.util.github_graphql_data_access import GithubGraphQlDataAccess from augur.application.db.lib import get_group_by_name, get_repo_by_repo_git, get_github_repo_by_src_id, get_gitlab_repo_by_src_id from augur.tasks.github.util.util import get_owner_repo -from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup +from augur.application.db.models.augur_operations import retrieve_owner_repos, FRONTEND_REPO_GROUP_NAME, RepoGroup, CollectionStatus from augur.tasks.github.util.github_paginator import hit_api from augur.application.db.models import UserRepo, Repo @@ -235,6 +235,8 @@ def add_github_repo(logger, session, url, repo_group_id, group_id, repo_type, re logger.error(f"Error while adding repo: Failed to insert user repo record. A record with a repo_id of {repo_id} and a group id of {group_id} needs to be added to the user repo table so that this repo shows up in the users group") return + CollectionStatus.insert(session, logger, repo_id) + def get_gitlab_repo_data(gl_session, url: str, logger) -> bool: @@ -281,6 +283,8 @@ def add_gitlab_repo(logger, session, url, repo_group_id, group_id, repo_src_id): if not result: logger.error(f"Error while adding repo: Failed to insert user repo record. A record with a repo_id of {repo_id} and a group id of {group_id} needs to be added to the user repo table so that this repo shows up in the users group") return + + CollectionStatus.insert(session, logger, repo_id) # @celery.task # def add_org_repo_list(user_id, group_name, urls): diff --git a/augur/tasks/git/util/facade_worker/facade_worker/analyzecommit.py b/augur/tasks/git/util/facade_worker/facade_worker/analyzecommit.py index 1e2e04ac12..5f8bcd5772 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/analyzecommit.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/analyzecommit.py @@ -32,6 +32,7 @@ import sqlalchemy as s from augur.application.db.lib import execute_sql, fetchall_data_from_sql_text +from augur.tasks.init import get_rabbitmq_conn_string def analyze_commit(logger, repo_id, repo_loc, commit): diff --git a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py index 40f3a29e0e..caae6c02ba 100644 --- a/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py +++ b/augur/tasks/git/util/facade_worker/facade_worker/utilitymethods.py @@ -26,7 +26,7 @@ # repos. It also rebuilds analysis data, checks any changed affiliations and # aliases, and caches data for display. import subprocess -from subprocess import check_output +from subprocess import check_output, CalledProcessError import os import sqlalchemy as s from augur.application.db.models import * @@ -154,7 +154,15 @@ def get_repo_commit_count(logger, facade_helper, repo_git): if count_branches(repo_loc) == 0: return 0 - check_commit_count_cmd = check_output(["git", "--git-dir", repo_loc, "rev-list", "--count", "HEAD"]) + try: + check_commit_count_cmd = check_output( + ["git", "--git-dir", repo_loc, "rev-list", "--count", "HEAD"], + stderr=subprocess.PIPE) + except CalledProcessError as e: + logger.error(f"Ran into {e}: {e.output} {e.stderr} \n With return code {e.returncode}") + raise e + + commit_count = int(check_commit_count_cmd) return commit_count diff --git a/augur/tasks/git/util/facade_worker/setup.py b/augur/tasks/git/util/facade_worker/setup.py index e2a1af8b75..ef65f223a2 100644 --- a/augur/tasks/git/util/facade_worker/setup.py +++ b/augur/tasks/git/util/facade_worker/setup.py @@ -27,7 +27,7 @@ def read(filename): 'Flask-Login==0.5.0', 'Flask-WTF==1.0.0', 'requests==2.32.0', - 'psycopg2-binary==2.9.3', + 'psycopg2-binary==2.9.9', 'click==8.0.3', 'XlsxWriter==1.3.7' ], diff --git a/augur/tasks/github/augur-notes.code-workspace b/augur/tasks/github/augur-notes.code-workspace new file mode 100644 index 0000000000..f99c46d484 --- /dev/null +++ b/augur/tasks/github/augur-notes.code-workspace @@ -0,0 +1,14 @@ +{ + "folders": [ + { + "path": "../../../../../augurlabs/augur-notes" + }, + { + "path": "../../.." + }, + { + "path": "../../../../../sociallycompute/project2025" + } + ], + "settings": {} +} \ No newline at end of file diff --git a/augur/tasks/github/events.py b/augur/tasks/github/events.py index cf7df57582..654dc68c6a 100644 --- a/augur/tasks/github/events.py +++ b/augur/tasks/github/events.py @@ -78,7 +78,7 @@ def _insert_issue_events(self, events): bulk_insert_dicts(self._logger, events, IssueEvent, issue_event_natural_keys) def _insert_pr_events(self, events): - pr_event_natural_keys = ["node_id"] + pr_event_natural_keys = ["repo_id", "issue_event_src_id"] bulk_insert_dicts(self._logger, events, PullRequestEvent, pr_event_natural_keys) def _insert_contributors(self, contributors): @@ -281,14 +281,23 @@ def _collect_and_process_issue_events(self, owner, repo, repo_id, key_auth, sinc with engine.connect() as connection: - # TODO: Remove src id if it ends up not being needed - query = text(f""" - select issue_id as issue_id, gh_issue_number as issue_number, gh_issue_id as gh_src_id - from issues - where repo_id={repo_id} - and updated_at > timestamptz(timestamp '{since}') - order by created_at desc; - """) + if since: + # TODO: Remove src id if it ends up not being needed + query = text(f""" + select issue_id as issue_id, gh_issue_number as issue_number, gh_issue_id as gh_src_id + from issues + where repo_id={repo_id} + and updated_at > timestamptz(timestamp '{since}') + order by created_at desc; + """) + else: + # TODO: Remove src id if it ends up not being needed + query = text(f""" + select issue_id as issue_id, gh_issue_number as issue_number, gh_issue_id as gh_src_id + from issues + where repo_id={repo_id} + order by created_at desc; + """) issue_result = connection.execute(query).fetchall() @@ -335,13 +344,21 @@ def _collect_and_process_pr_events(self, owner, repo, repo_id, key_auth, since): with engine.connect() as connection: - query = text(f""" - select pull_request_id, pr_src_number as gh_pr_number, pr_src_id - from pull_requests - where repo_id={repo_id} - and pr_updated_at > timestamptz(timestamp '{since}') - order by pr_created_at desc; - """) + if since: + query = text(f""" + select pull_request_id, pr_src_number as gh_pr_number, pr_src_id + from pull_requests + where repo_id={repo_id} + and pr_updated_at > timestamptz(timestamp '{since}') + order by pr_created_at desc; + """) + else: + query = text(f""" + select pull_request_id, pr_src_number as gh_pr_number, pr_src_id + from pull_requests + where repo_id={repo_id} + order by pr_created_at desc; + """) pr_result = connection.execute(query).fetchall() diff --git a/augur/tasks/github/pull_requests/tasks.py b/augur/tasks/github/pull_requests/tasks.py index b65da7f4f3..21ef9d8eba 100644 --- a/augur/tasks/github/pull_requests/tasks.py +++ b/augur/tasks/github/pull_requests/tasks.py @@ -231,9 +231,14 @@ def collect_pull_request_review_comments(repo_git: str, full_collection: bool) - repo_id = get_repo_by_repo_git(repo_git).repo_id if not full_collection: - # subtract 2 days to ensure all data is collected - core_data_last_collected = (get_core_data_last_collected(repo_id) - timedelta(days=2)).replace(tzinfo=timezone.utc) - review_msg_url += f"?since={core_data_last_collected.isoformat()}" + last_collected_date = get_secondary_data_last_collected(repo_id) + + if last_collected_date: + # subtract 2 days to ensure all data is collected + core_data_last_collected = (last_collected_date - timedelta(days=2)).replace(tzinfo=timezone.utc) + review_msg_url += f"?since={core_data_last_collected.isoformat()}" + else: + logger.warning(f"core_data_last_collected is NULL for recollection on repo: {repo_git}") pr_reviews = get_pull_request_reviews_by_repo_id(repo_id) diff --git a/augur/tasks/github/util/github_api_key_handler.py b/augur/tasks/github/util/github_api_key_handler.py index 9ca777a1be..47933e67dc 100644 --- a/augur/tasks/github/util/github_api_key_handler.py +++ b/augur/tasks/github/util/github_api_key_handler.py @@ -107,6 +107,8 @@ def get_api_keys(self) -> List[str]: if len(keys) == 0: return [] + + keys = [key.strip() for key in keys] valid_keys = [] with httpx.Client() as client: diff --git a/augur/tasks/github/util/github_data_access.py b/augur/tasks/github/util/github_data_access.py index a648f990c6..0eda1c8462 100644 --- a/augur/tasks/github/util/github_data_access.py +++ b/augur/tasks/github/util/github_data_access.py @@ -5,6 +5,8 @@ from urllib.parse import urlparse, parse_qs, urlencode from keyman.KeyClient import KeyClient +GITHUB_RATELIMIT_REMAINING_CAP = 50 + class RatelimitException(Exception): @@ -110,6 +112,13 @@ def make_request(self, url, method="GET", timeout=100): response.raise_for_status() + try: + if "X-RateLimit-Remaining" in response.headers and int(response.headers["X-RateLimit-Remaining"]) < GITHUB_RATELIMIT_REMAINING_CAP: + raise RatelimitException(response) + except ValueError: + self.logger.warning(f"X-RateLimit-Remaining was not an integer. Value: {response.headers['X-RateLimit-Remaining']}") + + return response def make_request_with_retries(self, url, method="GET", timeout=100): @@ -148,7 +157,7 @@ def __handle_github_ratelimit_response(self, response): f'\n\n\n\nSleeping for {retry_after} seconds due to secondary rate limit issue.\n\n\n\n') time.sleep(retry_after) - elif "X-RateLimit-Remaining" in headers and int(headers["X-RateLimit-Remaining"]) == 0: + elif "X-RateLimit-Remaining" in headers and int(headers["X-RateLimit-Remaining"]) < GITHUB_RATELIMIT_REMAINING_CAP: current_epoch = int(time.time()) epoch_when_key_resets = int(headers["X-RateLimit-Reset"]) key_reset_time = epoch_when_key_resets - current_epoch diff --git a/augur/tasks/gitlab/gitlab_api_key_handler.py b/augur/tasks/gitlab/gitlab_api_key_handler.py index 03c0cf66c3..72b0ace148 100644 --- a/augur/tasks/gitlab/gitlab_api_key_handler.py +++ b/augur/tasks/gitlab/gitlab_api_key_handler.py @@ -110,6 +110,8 @@ def get_api_keys(self) -> List[str]: if len(keys) == 0: return [] + keys = [key.strip() for key in keys] + valid_keys = [] with httpx.Client() as client: diff --git a/augur/tasks/init/celery_app.py b/augur/tasks/init/celery_app.py index 1be45b1f00..5026244f81 100644 --- a/augur/tasks/init/celery_app.py +++ b/augur/tasks/init/celery_app.py @@ -201,8 +201,8 @@ def setup_periodic_tasks(sender, **kwargs): The tasks so that they are grouped by the module they are defined in """ from celery.schedules import crontab - from augur.tasks.start_tasks import augur_collection_monitor, augur_collection_update_weights - from augur.tasks.start_tasks import non_repo_domain_tasks, retry_errored_repos + from augur.tasks.start_tasks import augur_collection_monitor + from augur.tasks.start_tasks import non_repo_domain_tasks, retry_errored_repos, create_collection_status_records from augur.tasks.git.facade_tasks import clone_repos from augur.tasks.db.refresh_materialized_views import refresh_materialized_views from augur.tasks.data_analysis.contributor_breadth_worker.contributor_breadth_worker import contributor_breadth_model @@ -232,6 +232,9 @@ def setup_periodic_tasks(sender, **kwargs): logger.info(f"Setting 404 repos to be marked for retry on midnight each day") sender.add_periodic_task(crontab(hour=0, minute=0),retry_errored_repos.s()) + one_day_in_seconds = 24*60*60 + sender.add_periodic_task(one_day_in_seconds, create_collection_status_records.s()) + @after_setup_logger.connect def setup_loggers(*args,**kwargs): """Override Celery loggers with our own.""" diff --git a/augur/tasks/start_tasks.py b/augur/tasks/start_tasks.py index 3ba30ed707..2a697a0ea1 100644 --- a/augur/tasks/start_tasks.py +++ b/augur/tasks/start_tasks.py @@ -378,5 +378,5 @@ def create_collection_status_records(self): CollectionStatus.insert(session, logger, repo[0]) repo = execute_sql(query).first() - #Check for new repos every seven minutes to be out of step with the clone_repos task - create_collection_status_records.si().apply_async(countdown=60*7) + # no longer recursively run this task because collection status records are added when repos are inserted + #create_collection_status_records.si().apply_async(countdown=60*7) diff --git a/augur/util/repo_load_controller.py b/augur/util/repo_load_controller.py index 7021a215fb..af46ce3260 100644 --- a/augur/util/repo_load_controller.py +++ b/augur/util/repo_load_controller.py @@ -5,7 +5,7 @@ from typing import Any, Dict from augur.application.db.engine import DatabaseEngine -from augur.application.db.models import Repo, UserRepo, RepoGroup, UserGroup, User +from augur.application.db.models import Repo, UserRepo, RepoGroup, UserGroup, User, CollectionStatus from augur.application.db.models.augur_operations import retrieve_owner_repos from augur.application.db.util import execute_session_query @@ -67,8 +67,10 @@ def add_cli_repo(self, repo_data: Dict[str, Any], from_org_list=False, repo_type # if the repo doesn't exist it adds it if "gitlab" in url: repo_id = Repo.insert_gitlab_repo(self.session, url, repo_group_id, "CLI") + CollectionStatus.insert(self.session, logger, repo_id) else: repo_id = Repo.insert_github_repo(self.session, url, repo_group_id, "CLI", repo_type) + CollectionStatus.insert(self.session, logger, repo_id) if not repo_id: logger.warning(f"Invalid repo group id specified for {url}, skipping.") diff --git a/docker-compose.yml b/docker-compose.yml index e077260ed7..81452bd09d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,14 +10,16 @@ services: - "POSTGRES_PASSWORD=${AUGUR_DB_PASSWORD:-augur}" - "PGDATA=/var/lib/postgresql/data/pgdata" ports: - - "127.0.0.1:${AUGUR_DB_PORT:-5432}:5432" + - "${AUGUR_DB_PORT:-5432}:5432" volumes: - augurpostgres:/var/lib/postgresql/data + networks: + - augur redis: image: "redis:alpine" - ports: - - 6379:6379 + networks: + - augur rabbitmq: image: augur-rabbitmq @@ -28,12 +30,8 @@ services: - RABBIT_MQ_DEFAULT_USER=${AUGUR_RABBITMQ_USERNAME:-augur} - RABBIT_MQ_DEFAULT_PASSWORD=${AUGUR_RABBITMQ_PASSWORD:-password123} - RABBIT_MQ_DEFAULT_VHOST=${AUGUR_RABBITMQ_VHOST:-augur_vhost} - # ports for amqp connections / management api - ports: - - 5671:5671 - - 5672:5672 - - 15671:15671 - - 15672:15672 + networks: + - augur augur: image: augur-new:latest @@ -45,8 +43,8 @@ services: restart: unless-stopped ports: - 5002:5000 - extra_hosts: - - "host.docker.internal:host-gateway" #Be able to ping services on the local machine + #extra_hosts: + # - "host.docker.internal:host-gateway" #Be able to ping services on the local machine environment: - "AUGUR_DB=postgresql+psycopg2://${AUGUR_DB_USER:-augur}:${AUGUR_DB_PASSWORD:-augur}@augur-db:5432/augur" - "AUGUR_DB_SCHEMA_BUILD=1" @@ -61,6 +59,30 @@ services: - augur-db - redis - rabbitmq + networks: + - augur + + # Flower is a UI that helps more easily monitor running tasks for celery workers. + # This wont show you every error from the entire augur system, but it should show most of them. + # uncomment the section below to use flower + #flower: + # image: augur-new:latest + # restart: unless-stopped + # command: + # [ "celery", "-A", "augur.tasks.init.celery_app.celery_app", "flower", "--max-tasks=1000000" ] + # ports: + # - 5555:5555 + # environment: + # - "AUGUR_DB=postgresql+psycopg2://${AUGUR_DB_USER:-augur}:${AUGUR_DB_PASSWORD:-augur}@augur-db:5432/augur" + # - REDIS_CONN_STRING=redis://redis:6379 + # - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-augur_vhost} + # depends_on: + # - augur + # - augur-db + # - redis + # - rabbitmq + # networks: + # - augur volumes: facade: @@ -68,4 +90,5 @@ volumes: augurpostgres: driver: local - +networks: + augur: \ No newline at end of file diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 220bb8ff49..959d3952f5 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -1,8 +1,38 @@ # SPDX-License-Identifier: MIT + + +FROM golang:1.23 AS golang + + +# RUN ./scripts/docker/install-go.sh +ENV PATH="${PATH}:/usr/local/go/bin" +COPY ./scripts/ scripts/ +RUN ./scripts/docker/install-golang-deps.sh + + +# FROM rust:1.78 as rust + +# # Ensure Rust directories are writable +# RUN mkdir -p /root/.rustup/downloads /root/.cargo/registry && \ +# chmod -R 777 /root/.rustup /root/.cargo + +# # Add rust and cargo to PATH +# # ENV PATH="/root/.cargo/bin:${PATH}" + +# # Install the specific version of Rust +# # RUN set -x \ +# # && rustup install 1.78.0 +# # RUN set -x \ +# # && rustup default 1.78.0 + + +# # Add rust and cargo to PATH +# ENV PATH="/usr/bin/:/root/.cargo/bin:/usr/local/bin:${PATH}" + FROM python:3.11-slim-bullseye LABEL maintainer="outdoors@acm.org" -LABEL version="0.76.6" +LABEL version="0.86.0" ENV DEBIAN_FRONTEND=noninteractive ENV PATH="/usr/bin/:/usr/local/bin:/usr/lib:${PATH}" @@ -24,15 +54,12 @@ RUN set -x \ postgresql-client \ libpq-dev \ build-essential \ - rustc \ - cargo \ chromium \ tar \ jq \ chromium-driver \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* \ - && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + && rm -rf /var/lib/apt/lists/* # Install Firefox from Debian repositories for ARM64 architecture RUN set -x \ @@ -58,18 +85,9 @@ RUN GECKODRIVER_VERSION=$(curl -s https://api.github.com/repos/mozilla/geckodriv RUN firefox --version RUN geckodriver --version -# Ensure Rust directories are writable -RUN mkdir -p /root/.rustup/downloads /root/.cargo/registry && \ - chmod -R 777 /root/.rustup /root/.cargo -# Add rust and cargo to PATH -ENV PATH="/root/.cargo/bin:${PATH}" -# Install the specific version of Rust -RUN set -x \ - && rustup install 1.78.0 -RUN set -x \ - && rustup default 1.78.0 + EXPOSE 5000 @@ -80,9 +98,9 @@ COPY ./augur/ augur/ COPY ./metadata.py . COPY ./setup.py . COPY ./scripts/ scripts/ +COPY ./keyman/ keyman/ + -# Add rust and cargo to PATH -ENV PATH="/usr/bin/:/root/.cargo/bin:/usr/local/bin:${PATH}" RUN python3 -m venv /opt/venv @@ -100,10 +118,11 @@ RUN set -x \ && /opt/venv/bin/pip install wheel \ && /opt/venv/bin/pip install . -RUN ./scripts/docker/install-go.sh -ENV PATH="${PATH}:/usr/local/go/bin" RUN ./scripts/docker/install-workers-deps.sh +COPY --from=golang "/root/scc" "/root/scc" +COPY --from=golang "/root/scorecard/scorecard" "/root/scorecard" + # RUN ./scripts/install/workers.sh RUN mkdir -p repos/ logs/ /augur/facade/ diff --git a/docker/backend/graphical b/docker/backend/graphical index a1c6b95d39..b5c223befe 100644 --- a/docker/backend/graphical +++ b/docker/backend/graphical @@ -1,22 +1,75 @@ -#SPDX-License-Identifier: MIT -FROM python:3.9-slim-bullseye +# SPDX-License-Identifier: MIT +FROM python:3.11-slim-bullseye LABEL maintainer="outdoors@acm.org" -LABEL version="0.51.1" +LABEL version="0.76.6" ENV DEBIAN_FRONTEND=noninteractive +ENV PATH="/usr/bin/:/usr/local/bin:/usr/lib:${PATH}" RUN set -x \ && apt-get update \ - && apt-get -y install --no-install-recommends \ + && apt-get -y install \ git \ bash \ curl \ gcc \ - python3-pip \ + software-properties-common \ + postgresql-contrib \ + musl-dev \ + python3-dev \ + python3-distutils \ + python3-venv \ wget \ postgresql-client \ - && rm -rf /var/lib/apt/lists/* + libpq-dev \ + build-essential \ + rustc \ + cargo \ + chromium \ + tar \ + jq \ + chromium-driver \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ + && curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Install Firefox from Debian repositories for ARM64 architecture +RUN set -x \ + && apt-get update \ + && apt-get install -y firefox-esr + +# Install Geckodriver +RUN GECKODRIVER_VERSION=$(curl -s https://api.github.com/repos/mozilla/geckodriver/releases/latest | jq -r '.tag_name' | sed 's/v//') \ + && ARCH=$(uname -m) \ + && if [ "$ARCH" = "aarch64" ]; then \ + GECKODRIVER_URL="https://github.com/mozilla/geckodriver/releases/download/v${GECKODRIVER_VERSION}/geckodriver-v${GECKODRIVER_VERSION}-linux-aarch64.tar.gz"; \ + GECKODRIVER_FILE="geckodriver-v${GECKODRIVER_VERSION}-linux-aarch64.tar.gz"; \ + else \ + GECKODRIVER_URL="https://github.com/mozilla/geckodriver/releases/download/v${GECKODRIVER_VERSION}/geckodriver-v${GECKODRIVER_VERSION}-linux64.tar.gz"; \ + GECKODRIVER_FILE="geckodriver-v${GECKODRIVER_VERSION}-linux64.tar.gz"; \ + fi \ + && wget $GECKODRIVER_URL \ + && tar -xzf $GECKODRIVER_FILE \ + && mv geckodriver /usr/local/bin/ \ + && rm $GECKODRIVER_FILE + +# Verify installations +RUN firefox --version +RUN geckodriver --version + +# Ensure Rust directories are writable +RUN mkdir -p /root/.rustup/downloads /root/.cargo/registry && \ + chmod -R 777 /root/.rustup /root/.cargo + +# Add rust and cargo to PATH +ENV PATH="/root/.cargo/bin:${PATH}" + +# Install the specific version of Rust +RUN set -x \ + && rustup install 1.78.0 +RUN set -x \ + && rustup default 1.78.0 EXPOSE 5000 @@ -27,20 +80,38 @@ COPY ./augur/ augur/ COPY ./metadata.py . COPY ./setup.py . COPY ./scripts/ scripts/ +COPY ./keyman/ keyman/ + +# Add rust and cargo to PATH +ENV PATH="/usr/bin/:/root/.cargo/bin:/usr/local/bin:${PATH}" -#COPY ./docker/backend/docker.config.json . RUN python3 -m venv /opt/venv +RUN set -x \ + && /opt/venv/bin/pip install --upgrade pip + +RUN set -x \ + && /opt/venv/bin/pip install wheel + RUN set -x \ && /opt/venv/bin/pip install . -RUN ./scripts/docker/install-workers-deps.sh +RUN set -x \ + && /opt/venv/bin/pip install --upgrade pip \ + && /opt/venv/bin/pip install wheel \ + && /opt/venv/bin/pip install . RUN ./scripts/docker/install-go.sh +ENV PATH="${PATH}:/usr/local/go/bin" +RUN ./scripts/docker/install-workers-deps.sh + # RUN ./scripts/install/workers.sh RUN mkdir -p repos/ logs/ /augur/facade/ COPY ./docker/backend/graphical.sh / -RUN chmod +x /graphical.sh -ENTRYPOINT /graphical.sh +COPY ./docker/backend/init.sh / +RUN chmod +x /entrypoint.sh /init.sh +ENTRYPOINT ["/bin/bash", "/graphical.sh"] +#ENTRYPOINT ["/entrypoint.sh"] +CMD /init.sh diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index c1d1098875..e5825ca7bf 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -2,7 +2,7 @@ FROM postgres:16 LABEL maintainer="outdoors@acm.org" -LABEL version="0.76.6" +LABEL version="0.86.0" ENV POSTGRES_DB "test" ENV POSTGRES_USER "augur" diff --git a/docker/rabbitmq/Dockerfile b/docker/rabbitmq/Dockerfile index d0ea2dea4a..9b0b3c98c3 100644 --- a/docker/rabbitmq/Dockerfile +++ b/docker/rabbitmq/Dockerfile @@ -1,7 +1,7 @@ FROM rabbitmq:3.12-management-alpine LABEL maintainer="574/augur@simplelogin.com" -LABEL version="0.76.6" +LABEL version="0.86.0" ARG RABBIT_MQ_DEFAULT_USER=augur ARG RABBIT_MQ_DEFAULT_PASSWORD=password123 diff --git a/docs/dev-osx-install.md b/docs/dev-osx-install.md index c70bac7714..aa5b67b7fa 100644 --- a/docs/dev-osx-install.md +++ b/docs/dev-osx-install.md @@ -31,7 +31,7 @@ export PKG_CONFIG_PATH="/opt/homebrew/opt/openblas/lib/pkgconfig" ## Git Platform Requirements (Things to have setup prior to initiating installation.) 1. Obtain a GitHub Access Token: https://github.com/settings/tokens -2. Obtain a GitLab Access Token: https://gitlab.com/-/profile/personal_access_tokens +2. Obtain a GitLab Access Token: https://gitlab.com/-/user_settings/personal_access_tokens ### Fork and Clone Augur 1. Fork https://github.com/chaoss/augur diff --git a/docs/new-install-ubuntu-python-3.10.md b/docs/new-install-ubuntu-python-3.10.md index 3cd7a5a38a..6fa2ee78fc 100644 --- a/docs/new-install-ubuntu-python-3.10.md +++ b/docs/new-install-ubuntu-python-3.10.md @@ -5,7 +5,7 @@ We default to this version of Ubuntu for the moment because Augur does not yet s ## Git Platform Requirements (Things to have setup prior to initiating installation.) 1. Obtain a GitHub Access Token: https://github.com/settings/tokens -2. Obtain a GitLab Access Token: https://gitlab.com/-/profile/personal_access_tokens +2. Obtain a GitLab Access Token: https://gitlab.com/-/user_settings/personal_access_tokens ### Fork and Clone Augur 1. Fork https://github.com/chaoss/augur diff --git a/docs/new-install.md b/docs/new-install.md index 9ec8960857..017cb41322 100644 --- a/docs/new-install.md +++ b/docs/new-install.md @@ -5,7 +5,7 @@ We default to this version of Ubuntu for the moment because Augur does not yet s ## Git Platform Requirements (Things to have setup prior to initiating installation.) 1. Obtain a GitHub Access Token: https://github.com/settings/tokens -2. Obtain a GitLab Access Token: https://gitlab.com/-/profile/personal_access_tokens +2. Obtain a GitLab Access Token: https://gitlab.com/-/user_settings/personal_access_tokens ### Fork and Clone Augur 1. Fork https://github.com/chaoss/augur diff --git a/docs/source/getting-started/dev-osx-install.rst b/docs/source/getting-started/dev-osx-install.rst index 05285da207..05d16c29be 100644 --- a/docs/source/getting-started/dev-osx-install.rst +++ b/docs/source/getting-started/dev-osx-install.rst @@ -47,8 +47,7 @@ Git Platform Requirements (Things to have setup prior to initiating installation ---------------------------------------------------------------------------------- 1. Obtain a GitHub Access Token: https://github.com/settings/tokens -2. Obtain a GitLab Access Token: - https://gitlab.com/-/profile/personal_access_tokens +2. Obtain a GitLab Access Token: https://gitlab.com/-/user_settings/personal_access_tokens Fork and Clone Augur ~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst index 5c2e2f62f7..4e367958c1 100644 --- a/docs/source/getting-started/installation.rst +++ b/docs/source/getting-started/installation.rst @@ -216,11 +216,6 @@ your installation of Python 3: on most systems, this is ``python3``, but yours m # run the install script $ make install -.. code-block:: bash - - # If you want to develop with Augur, use this command instead - $ make install-dev - If you think something went wrong, check the log files in ``logs/``. If you want to try again, you can use ``make clean`` to delete any build files before running ``make install`` again. MacOS users: diff --git a/docs/source/getting-started/new-install.rst b/docs/source/getting-started/new-install.rst index 3fd45f4a97..b10aa0b41e 100644 --- a/docs/source/getting-started/new-install.rst +++ b/docs/source/getting-started/new-install.rst @@ -9,8 +9,7 @@ Git Platform Requirements (Things to have setup prior to initiating installation ---------------------------------------------------------------------------------- 1. Obtain a GitHub Access Token: https://github.com/settings/tokens -2. Obtain a GitLab Access Token: - https://gitlab.com/-/profile/personal_access_tokens +2. Obtain a GitLab Access Token: https://gitlab.com/-/user_settings/personal_access_tokens Fork and Clone Augur ~~~~~~~~~~~~~~~~~~~~ diff --git a/gsoc-ideas.md b/gsoc-ideas.md index 1cf74e11e6..3bc2748811 100644 --- a/gsoc-ideas.md +++ b/gsoc-ideas.md @@ -5,6 +5,34 @@ [Micro-tasks and place for questions](https://github.com/chaoss/augur/issues/1640) +## Microtasks + +For becoming familiar with Augur, you can start by reading some documentation. You can find useful information at in the links, below. Grimoirelab also has a set of installation instructions and documentation here: https://chaoss.github.io/grimoirelab-tutorial/ + +#### GSoC Students : +Once you're familiar with Augur, you can have a look at the following microtasks. + +Microtask 0: + Download and configure Augur, creating a dev environment using the general cautions noted here: +Augur + https://oss-augur.readthedocs.io/en/dev/getting-started/installation.html and the full documentation here: + https://oss-augur.readthedocs.io/en/dev/development-guide/toc.html +Grimoirelab + https://chaoss.github.io/grimoirelab-tutorial/ + +Microstask 1: + Work on any Augur or Grimoirelab Issue that's Open + +Microtask 2: + Identify new issues you encounter during installation. + +Microstask 3: + Explore data presently captured, develop an experimental visualization using tools of your choice. If Jupyter Notebooks against an Augur database/API endpoint collection, use https://github.com/chaoss/augur-community-reports for development. + +Microtask 4: + Anything you want to show us. Even if you find bugs in our documentation and want to issue a PR for those! + + This project will add GenSIM logic, and other capabilities to the Clustering Worker inside of Augur Software, and be extended into a generalized Open Source Software Conversational Topic Modeling Instrument. CHOASS/augur has several workers that store machine learning information derived from computational linguistic analysis of data in the `message` table. The message table includes messages from issue, pull request, pull request review, and email messages. They are related to their origin with bridge tables like `pull_request_message_ref`. The ML/CL workers are all run against all the messages, regardless of origin. @@ -50,7 +78,122 @@ The aims of the project are as follows: - (Stretch Goal) Improve the operation of the overall machine learning insights pipeline in CHAOSS/augur, and generalize these capabilities. + +## IDEA: Implement Conversion Rate Metric in CHAOSS Software + +**Hours: 350** + +[Micro-tasks and place for questions](https://github.com/chaoss/augur/issues/2992) + +### Conversion Rate + +Question: What are the rates at which new contributors become more sustained contributors? + +### Description + +The conversion rate metric is primarily aimed at identifying how new community members become more sustained contributors over time. However, the conversion rate metric can also help understand the changing roles of contributors, how a community is growing or declining, and paths to maintainership within an open source community. + +### Objectives (why) + - Observe if new members are becoming more involved with an open source project + - Observe if new members are taking on leadership roles within an open source project + - Observe if outreach efforts are generating new contributors to an open source project + - Observe if outreach efforts are impacting roles of existing community members + - Observe if community conflict results in changing roles within an open source community + - Identify casual, regular, and core contributors + +### Implementation + +This project could be implemented using either the CHAOSS/Augur, or CHAOSS/Grimoirelab (including stack components noted in references) technology stacks. + +The aims of the project are as follows: + - Implement the Conversion Rate Metric in CHAOSS Software + - After discussion, consider which CHAOSS Software Stack you wish to work with + - In collaboration with mentors, define the technology framework, and initial path to a "hello world" version of the metric + - Iterative development of the metric + - Assist in the deployment of this metric for a pre-determined collection of repositories in a publicly viewable website linked to the CHAOSS project. + - Advance the work of the [chaoss metrics models working group](https://github.com/chaoss/wg-metrics-models). + +* _Difficulty:_ Medium +* _Requirements:_ Knowledge of Python is desired. Some knowledge of Javascript or twitter/bootstrap is also desired. Key requirement is a keenness to dig into this challenge! +* _Recommended:_ Python experience. +* _Mentors:_ Sean Goggins + +#### Filters (optional) + - Commits + - Issue creation + - Issue comments + - Change request creation + - Change request comments + - Merged change requests + - Code Reviews + - Code Review Comments + - Reactions (emoji) + - Chat platform messages + - Maillist messages + - Meetup attendance + +#### Visualizations + +![](./images/gsoc-1.png) + +Source: https://chaoss.github.io/grimoirelab-sigils/assets/images/screenshots/sigils/overall-community-structure.png + +![](./images/gsoc-2.png) + +Source: https://opensource.com/sites/default/files/uploads/2021-09-15-developer-level-02.png + +#### Tools Providing the Metric + - Augur + - openEuler Infra + +#### Data Collection Strategies + +The following is an example from the [openEuler](https://www.openeuler.org/en/) community: + - A group of people who attended an offline event A held by the community, can be identified as Group A. Demographic information of Group A could be fetched from an on-line survey when people register for the event. To identify the conversation rate of these participants: + - Some people from Group A started watching and forking the repos, indicating they have shown some interest in this community. We marked them as subgroup D0 (Developer Level 0) as a subset of Group A. + - Conversion rate from the total number of people in Group A to the number of people in subgroup D0 is: D0/Group A + - Some people from subgroup D0 make more contributions beyond just watching or forking, including creating issues, making comments on an issue, or performed a code review. We marked them as subgroup D1 (Developer Level 1) as a subset of D0. + - Conversion rate from the total number of people in Subgroup D0 to the number of people in subgroup D1 is: D1/D0. + - Some people from subgroup D1 continue to make more contributions, like code contributions, to the project. This could include creating merge requests and merging new project code. We marked them as subgroup D2 (Developer Level 2) as a subset of D1. + - Conversion rate from the total number of people in subgroup D1 to the number of people in subgroup D2 is: D2/D1. + +![](./images/gsoc-3.png) + + Definition: + - Developer Level 0 (D0) example: Contributors who have given the project a star, or are watching or have forked the repository + - Developer Level 1 (D1): Contributors who have created issues, made comments on an issue, or performed a code review + - Developer Level 2 (D2): Contributors who have created a merge request and successfully merged code + - Conversion Rate (Group A -> D0): CR (Group A -> D2) = D0/Group A + - Conversion Rate (D0 -> D1): CR (D0 -> D1) = D1/D0 + - Conversion Rate (D1 -> D2): CR (D1 -> D2) = D2/D1 + +### References + - https://opensource.com/article/21/11/data-open-source-contributors + - https://github.com/chaoss/augur + - https://gitee.com/openeuler/website-v2/blob/master/web-ui/docs/en/blog/zhongjun/2021-09-15-developer-level.md + - https://chaoss.github.io/grimoirelab-sigils/common/onion_analysis/ + - https://mikemcquaid.com/2018/08/14/the-open-source-contributor-funnel-why-people-dont-contribute-to-your-open-source-project/ +### Contributors + - Sean Goggins + - Andrew Brain + - John McGinness + +## IDEA: Open Source Software Health Metrics Visualization Exploration + +**Hours: 350** + +[Micro-tasks and place for questions](https://github.com/chaoss/augur/issues/2993) + +The CHAOSS Community currently delivers pre-packaged visualizations of open source software health data through Augur APIs (https://github.com/chaoss/augur/blob/main/augur/routes/pull_request_reports.py and https://github.com/chaoss/augur/blob/main/augur/routes/contributor_reports.py), and the https://github.com/chaoss/augur-community-reports repository. This project seeks to expand, refine, and standardize the visualization of different classes of community health metrics data. Specifically, some analyses are temporal, others are anomaly driven, and in some cases contrasts across repositories and communities are required. In each case, the visualization of data is an essential component for metrics, and what we are now referring to as metrics models (https://github.com/chaoss/wg-metrics-models). + +Additional resources include: http://new.augurlabs.io/ && https://github.com/augurlabs/augur_view which demonsrate the updated twitter/bootstrap Augur frontend. + +The aims of the project are as follows: + - Experiment with standard metrics visualizations using direct Augur database connections, or through the Augur API. + - Refine metrics, and metrics model visualizations using Jupyter Notebooks are similar technology. + - Transform visualizations, as they are completed, into Augur API endpoints, following the pull request, and contributor reports examples. + * _Difficulty:_ Medium -* _Requirements:_ Interest in software analytics. Python programming. Conceptual understanding of machine learning, and an eagerness to learn maching learning, and SQL knowledge. -* _Recommended:_ Experience with Python -* _Mentors:_ Sean Goggins, Andrew Brain, Isaac Milarsky +* _Requirements:_ Strong interest in data visualization. +* _Recommended:_ Experience with Python is desirable, and experience designing, or developing visualizations is desirable. +* _Mentors:_ Isaac Milarsky, Andrew Brain diff --git a/gsoc-interest.md b/gsoc-interest.md index 04a237581d..e737687a01 100644 --- a/gsoc-interest.md +++ b/gsoc-interest.md @@ -1,5 +1,7 @@ # Google Summer of Code 2025 Interested Candidates +[IDEAS PAGE](https://github.com/chaoss/augur/blob/main/gsoc-ideas.md) + Hi potential GSoC students, You can ask questions and meet the community on Slack here: https://join.slack.com/t/chaoss-workspace/shared_invite/zt-289zxh6tu-3oQaFlutPFY039MjKpnWcA ... look for the `wg-augur-8knot` channel. diff --git a/keyman/__init__.py b/keyman/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metadata.py b/metadata.py index 2ac76c87d3..740db52f3d 100644 --- a/metadata.py +++ b/metadata.py @@ -5,8 +5,8 @@ __short_description__ = "Python 3 package for free/libre and open-source software community metrics, models & data collection" -__version__ = "0.81.0" -__release__ = "v0.81.0 (Super Soaker)" +__version__ = "0.86.0" +__release__ = "v0.86.0 (Pod People)" __license__ = "MIT" -__copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Sean Goggins, Brian Warner & Augurlabs 2112" +__copyright__ = "University of Missouri, University of Nebraska-Omaha, CHAOSS, Sean Goggins, Brian Warner & Augurlabs 2025" diff --git a/podman-compose.yml b/podman-compose.yml deleted file mode 100644 index 9970f81325..0000000000 --- a/podman-compose.yml +++ /dev/null @@ -1,68 +0,0 @@ -#SPDX-License-Identifier: MIT -version: '3' -services: - augur-db: - image: postgres:14 - restart: unless-stopped - environment: - - "POSTGRES_DB=augur" - - "POSTGRES_USER=${AUGUR_DB_USER:-augur}" - - "POSTGRES_PASSWORD=${AUGUR_DB_PASSWORD:-augur}" - - "PGDATA=/var/lib/postgresql/data/pgdata" - ports: - - "${AUGUR_DB_PORT:-5432}:5432" - volumes: - - augurpostgres:/var/lib/postgresql/data - - redis: - image: "redis:alpine" - ports: - - 6379:6379 - - rabbitmq: - image: augur-rabbitmq - build: - context: . - dockerfile: ./docker/rabbitmq/Dockerfile - args: - - RABBIT_MQ_DEFAULT_USER=${AUGUR_RABBITMQ_USERNAME:-augur} - - RABBIT_MQ_DEFAULT_PASSWORD=${AUGUR_RABBITMQ_PASSWORD:-password123} - - RABBIT_MQ_DEFAULT_VHOST=${AUGUR_RABBITMQ_VHOST:-augur_vhost} - # ports for amqp connections / management api - ports: - - 5671:5671 - - 5672:5672 - - 15671:15671 - - 15672:15672 - - augur: - image: augur-new:latest - build: - context: . - dockerfile: ./docker/backend/Dockerfile - volumes: - - facade:/augur/facade - restart: unless-stopped - ports: - - 5002:5000 - environment: - - "AUGUR_DB=postgresql+psycopg2://${AUGUR_DB_USER:-augur}:${AUGUR_DB_PASSWORD:-augur}@augur-db:5432/augur" - - "AUGUR_DB_SCHEMA_BUILD=1" - - "AUGUR_GITHUB_API_KEY=${AUGUR_GITHUB_API_KEY}" - - "AUGUR_GITLAB_API_KEY=${AUGUR_GITLAB_API_KEY}" - - "AUGUR_GITHUB_USERNAME=${AUGUR_GITHUB_USERNAME}" - - "AUGUR_GITLAB_USERNAME=${AUGUR_GITLAB_USERNAME}" - - REDIS_CONN_STRING=redis://redis:6379 - - RABBITMQ_CONN_STRING=amqp://${AUGUR_RABBITMQ_USERNAME:-augur}:${AUGUR_RABBITMQ_PASSWORD:-password123}@rabbitmq:5672/${AUGUR_RABBITMQ_VHOST:-augur_vhost} - depends_on: - - augur-db - - redis - - rabbitmq - -volumes: - facade: - driver: local - augurpostgres: - driver: local - - diff --git a/scripts/docker/install-golang-deps.sh b/scripts/docker/install-golang-deps.sh new file mode 100755 index 0000000000..7d255b6b2c --- /dev/null +++ b/scripts/docker/install-golang-deps.sh @@ -0,0 +1,29 @@ +#!/bin/bash +set -x + +# Note this +CURRENT_DIR=$PWD; +# 18GB +# Install scc +SCC_DIR="$HOME/scc" +echo "Cloning Sloc Cloc and Code (SCC) to generate value data ..." +# this needs to be done from source. the latest version doesnt seem to exist on the package repo +# however, the latest version (v3.5.0) requires bumping the golang version in the Dockerfile +git clone --depth 1 --branch v3.4.0 https://github.com/boyter/scc "$SCC_DIR" +cd $SCC_DIR +go build; +echo "scc build done" +cd $CURRENT_DIR +# 18GB + +# Install scorecard +SCORECARD_DIR="$HOME/scorecard" +echo "Cloning OSSF Scorecard to generate scorecard data ..." +# lock version to prevent future issues if the golang version is bumped +git clone --depth 1 --branch v5.1.1 https://github.com/ossf/scorecard $SCORECARD_DIR +cd $SCORECARD_DIR +go build; +echo "scorecard build done" +cd $CURRENT_DIR + +# 16GB \ No newline at end of file diff --git a/scripts/docker/install-workers-deps.sh b/scripts/docker/install-workers-deps.sh index 188ec43622..c69e179486 100755 --- a/scripts/docker/install-workers-deps.sh +++ b/scripts/docker/install-workers-deps.sh @@ -8,30 +8,3 @@ do /opt/venv/bin/pip install . cd $OLD done - -# install nltk -# taken from ./scripts/install/nltk_dictionaries.sh -for i in stopwords punkt popular universal_tagset ; do - /opt/venv/bin/python -m nltk.downloader $i -done - -# Note this -CURRENT_DIR=$PWD; - -# Install scc -SCC_DIR="$HOME/scc" -echo "Cloning Sloc Cloc and Code (SCC) to generate value data ..." -git clone https://github.com/boyter/scc "$SCC_DIR" -cd $SCC_DIR -go build; -echo "scc build done" -cd $CURRENT_DIR - -# Install scorecard -SCORECARD_DIR="$HOME/scorecard" -echo "Cloning OSSF Scorecard to generate scorecard data ..." -git clone https://github.com/ossf/scorecard $SCORECARD_DIR -cd $SCORECARD_DIR -go build; -echo "scorecard build done" -cd $CURRENT_DIR diff --git a/setup.py b/setup.py index 8591f483e2..a356fef858 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ "pandas==1.5.3", # 1.4.3 "numpy==1.26.0", # 1.23.2 "requests==2.32.0", # 2.28.1 - "psycopg2-binary==2.9.3", #2.9.3 what is pscopg-binary 3.0.16 + "psycopg2-binary==2.9.9", #2.9.3 what is pscopg-binary 3.0.16 "click==8.0.3", # 8.1.3 "psutil==5.8.0", # 5.9.1 "gunicorn==22.0.0", # 20.1.0 diff --git a/test.md b/test.md index 9daeafb986..753e9975dd 100644 --- a/test.md +++ b/test.md @@ -1 +1 @@ -test +teh test