From b4c6595f7b8e807311a46c2d4854fb63b7792d04 Mon Sep 17 00:00:00 2001 From: Ulincsys Date: Tue, 29 Apr 2025 18:32:08 -0500 Subject: [PATCH 1/6] Fix facade task failure - Rollback transaction on failure - Automatically substitute tz for UTC when invalid - Do not reset date on invalid tz Signed-off-by: Ulincsys --- augur/application/db/lib.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/augur/application/db/lib.py b/augur/application/db/lib.py index d6c550f30b..3e22b18be4 100644 --- a/augur/application/db/lib.py +++ b/augur/application/db/lib.py @@ -1,3 +1,4 @@ +import re import time import random import logging @@ -243,6 +244,7 @@ def facade_bulk_insert_commits(logger, records): ) session.commit() except Exception as e: + session.rollback() if len(records) > 1: logger.error(f"Ran into issue when trying to insert commits \n Error: {e}") @@ -257,7 +259,14 @@ def facade_bulk_insert_commits(logger, records): commit_record = records[0] #replace incomprehensible dates with epoch. #2021-10-11 11:57:46 -0500 - placeholder_date = "1970-01-01 00:00:15 -0500" + + # placeholder_date = "1970-01-01 00:00:15 -0500" + placeholder_date = commit_record['author_timestamp'] + + # Reconstruct timezone portion of the date string to UTC + placeholder_date = re.split("[-+]", placeholder_date) + placeholder_date.pop() + placeholder_date = "-".join(placeholder_date) + "+0000" #Check for improper utc timezone offset #UTC timezone offset should be between -14:00 and +14:00 From e5efe332b5ba9dd75fbbbc53160ba0058d3f22dd Mon Sep 17 00:00:00 2001 From: John Strunk Date: Fri, 16 May 2025 14:46:09 -0400 Subject: [PATCH 2/6] Add cache symlink Add symlink out to the cache volume so that gunicorn is able to start since it can't write to the source directory in the container Signed-off-by: John Strunk --- docker/backend/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 16bfb415f8..bcf7380039 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -109,6 +109,7 @@ ENV SCORECARD_DIR=/scorecard COPY --from=golang-builder --chmod=u=rw,u+X,go=r,go+X "/scorecard" "/scorecard/scorecard" RUN mkdir -p repos/ logs/ /augur/facade/ +RUN ln -s /cache /augur/augur/static/cache COPY --chmod=u=rwx,go=rx ./docker/backend/entrypoint.sh / COPY --chmod=u=rwx,go=rx ./docker/backend/init.sh / From 12b8e6ad0fa845480175a4e3d644fb07ff273200 Mon Sep 17 00:00:00 2001 From: John Strunk Date: Thu, 15 May 2025 11:10:23 -0400 Subject: [PATCH 3/6] Update add smoketest to workflow Refactor GitHub workflow to build and push Docker images for backend, database, and rabbitmq services, and add smoke testing to ensure services start correctly. Signed-off-by: John Strunk --- .github/workflows/build_docker.yml | 136 +++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index fbce968697..63632207bc 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -5,9 +5,6 @@ on: - main - dev pull_request: - branches: - - main - - dev release: types: - published @@ -16,8 +13,131 @@ on: permissions: {} jobs: - build: + build-image: name: Build image + permissions: + contents: read + strategy: + matrix: + image: + - backend + - database + - keyman + - rabbitmq + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + id: setup-buildx + + - name: Build container + id: build + uses: docker/build-push-action@v6 + with: + context: . + file: ./docker/${{ matrix.image }}/Dockerfile + platforms: linux/amd64 + tags: ghcr.io/${{ github.repository_owner }}/augur_${{ matrix.image }}:test + cache-from: type=gha,scope=container-${{ matrix.image }} + cache-to: type=gha,scope=container-${{ matrix.image }},mode=min + outputs: type=docker,dest=/tmp/${{ matrix.image }}-image.tar + + - name: Save image as artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.image }}-image + path: /tmp/${{ matrix.image }}-image.tar + + + + smoke-test: + name: Smoke test + needs: build-image + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download image artifact (backend) + uses: actions/download-artifact@v4 + with: + name: backend-image + path: /tmp + + - name: Dowload image artifact (keyman) + uses: actions/download-artifact@v4 + with: + name: keyman-image + path: /tmp + + - name: Dowload image artifact (database) + uses: actions/download-artifact@v4 + with: + name: database-image + path: /tmp + + - name: Dowload image artifact (rabbitmq) + uses: actions/download-artifact@v4 + with: + name: rabbitmq-image + path: /tmp + + - name: Load images + run: | + docker load -i /tmp/backend-image.tar + docker load -i /tmp/database-image.tar + docker load -i /tmp/keyman-image.tar + docker load -i /tmp/rabbitmq-image.tar + + - name: Prepare compose file + run: | + yq eval -i '.services.augur.image = "ghcr.io/${{ github.repository_owner }}/augur_backend:test"' docker-compose.yml + yq eval -i '.services.augur.pull_policy = "never"' docker-compose.yml + yq eval -i '.services.augur.restart = "no"' docker-compose.yml + + yq eval -i '.services.augur-db.image = "ghcr.io/${{ github.repository_owner }}/augur_database:test"' docker-compose.yml + yq eval -i '.services.augur-db.pull_policy = "never"' docker-compose.yml + yq eval -i '.services.augur-db.restart = "no"' docker-compose.yml + + yq eval -i '.services.augur-keyman.image = "ghcr.io/${{ github.repository_owner }}/augur_keyman:test"' docker-compose.yml + yq eval -i '.services.augur-keyman.pull_policy = "never"' docker-compose.yml + yq eval -i '.services.augur-keyman.restart = "no"' docker-compose.yml + + yq eval -i '.services.rabbitmq.image = "ghcr.io/${{ github.repository_owner }}/augur_rabbitmq:test"' docker-compose.yml + yq eval -i '.services.rabbitmq.pull_policy = "never"' docker-compose.yml + yq eval -i '.services.rabbitmq.restart = "no"' docker-compose.yml + + - name: Setup Docker Compose + uses: docker/setup-compose-action@v1 + with: + version: latest + + - name: Start services & wait for output + run: | + docker compose -f docker-compose.yml up --no-build 2>&1 \ + | tee >(grep -q "Sending due task"; \ + docker compose -f docker-compose.yml down) + timeout-minutes: 3 + env: + AUGUR_GITLAB_USERNAME: dummy + AUGUR_GITLAB_API_KEY: dummy + AUGUR_GITHUB_USERNAME: dummy + AUGUR_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} + + - name: Dump logs + if: always() # Always run this step to get logs even if the previous step fails + run: "docker run -t --rm -v augur_logs:/logs bash -c 'find /logs -type f | xargs tail -n +0'" + + + + push-image: + name: Push image + needs: smoke-test + # We don't push images on pull requests + if: github.event_name != 'pull_request' permissions: contents: read # to fetch code (actions/checkout) packages: write # to push docker image @@ -57,8 +177,9 @@ jobs: images: ghcr.io/${{ github.repository_owner }}/augur_${{ matrix.image }} # Pushes to the dev branch update the *:devel-latest tag # Releases update the *:latest tag and the *: tag + # Main does not update any tags tags: | - type=raw,value=devel-latest,enable=${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.ref == 'refs/heads/dev' }} + type=raw,value=devel-latest,enable=${{ github.ref == 'refs/heads/dev' }} type=raw,value=latest,enable=${{ github.event_name == 'release' }} type=raw,value=${{ github.event.release.tag_name }},enable=${{ github.event_name == 'release' }} @@ -72,7 +193,8 @@ jobs: labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64 # Only push if we've tagged the image in the metadata step - push: ${{ github.event_name != 'pull_request' && steps.meta.outputs.tags != '' }} + push: ${{ steps.meta.outputs.tags != '' }} tags: ${{ steps.meta.outputs.tags }} + # Use the same cache as the build step cache-from: type=gha,scope=container-${{ matrix.image }} - cache-to: type=gha,scope=container-${{ matrix.image }},mode=max + cache-to: type=gha,scope=container-${{ matrix.image }},mode=min From 7d34b544710832d1adc83f0797c241e4d47db416 Mon Sep 17 00:00:00 2001 From: John Strunk Date: Fri, 16 May 2025 16:57:49 -0400 Subject: [PATCH 4/6] Improve CI output matching Improve CI workflow to wait for specific log lines via a python script to match regex patterns from stdin. Signed-off-by: John Strunk --- .github/workflows/build_docker.yml | 27 +++++++++--- scripts/ci/await_all.py | 70 ++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 5 deletions(-) create mode 100755 scripts/ci/await_all.py diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index 63632207bc..bf23ecee23 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -115,17 +115,34 @@ jobs: with: version: latest + - name: Set up list of log lines to match + run: | + cat < /tmp/regex_matches.txt + Gunicorn webserver started + Starting core worker processes + Starting secondary worker processes + Starting facade worker processes + Retrieved \\d+ github api keys for use + Fetching new repos \\(complete\\) + Inserting \\d+ contributors + Inserting \\d+ issues + Inserting prs of length: \\d+ + Querying committers count + Done generating scc data for repo + Sending due task + EOF + - name: Start services & wait for output run: | docker compose -f docker-compose.yml up --no-build 2>&1 \ - | tee >(grep -q "Sending due task"; \ - docker compose -f docker-compose.yml down) + | (./scripts/ci/await_all.py /tmp/regex_matches.txt \ + && docker compose -f docker-compose.yml down) timeout-minutes: 3 env: - AUGUR_GITLAB_USERNAME: dummy - AUGUR_GITLAB_API_KEY: dummy - AUGUR_GITHUB_USERNAME: dummy AUGUR_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} + AUGUR_GITHUB_USERNAME: dummy + AUGUR_GITLAB_API_KEY: dummy + AUGUR_GITLAB_USERNAME: dummy - name: Dump logs if: always() # Always run this step to get logs even if the previous step fails diff --git a/scripts/ci/await_all.py b/scripts/ci/await_all.py new file mode 100755 index 0000000000..2da8590f39 --- /dev/null +++ b/scripts/ci/await_all.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +""" +await_all.py: Waits for all regex patterns to match from stdin input. + +Usage: + python await_all.py + +- should be a text file with one regex pattern per line. +- Reads lines from stdin, echoing each line to stdout. +- Prints a match message to stdout when a pattern matches a line. +- Exits with success (0) when all patterns have matched at least once. +- Exits with failure (1) if end of input is reached and not all patterns matched. + +Example: + python await_all.py patterns.txt < input.txt + + where patterns.txt contains: + ^ERROR + ^WARNING + ^INFO + + and input.txt contains: + INFO: All systems operational + ... other lines ... + WARNING: Low disk space + ERROR: Disk failure +""" + +import sys +import re + +if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(2) + +patterns_file = sys.argv[1] + +# Read regex patterns from the file +with open(patterns_file, 'r') as f: + pattern_lines = [line.strip() for line in f if line.strip()] + +if not pattern_lines: + print("No patterns found in the file.", file=sys.stderr) + sys.exit(2) + +patterns = [(i, re.compile(p)) for i, p in enumerate(pattern_lines)] +matched = set() + +try: + for line in sys.stdin: + print(line, end='') # Copy to stdout immediately + for idx, regex in patterns: + if regex.search(line): + print(f"✅✅✅ MATCH pattern {idx+1}: '{pattern_lines[idx]}'") + matched.add(idx) + if len(matched) == len(patterns): + # All patterns matched, exit early + break +except KeyboardInterrupt: + pass + +unmatched = [pattern_lines[i] for i in range(len(patterns)) if i not in matched] +if unmatched: + print("❌❌❌❌❌ Did not match all patterns. ❌❌❌❌❌\nUnmatched patterns:") + for p in unmatched: + print(f"❌ - {p}") + sys.exit(1) +else: + print("✅✅✅✅✅ All patterns matched. ✅✅✅✅✅") + sys.exit(0) From a814e57de6a5e4a358e6e72ae03468c361b9989d Mon Sep 17 00:00:00 2001 From: John Strunk Date: Mon, 19 May 2025 11:29:17 -0400 Subject: [PATCH 5/6] Optimize build workflow Replaced build-image job with test-e2e job that builds and loads all the containers directly. This saves substantial time by avoiding upload/download of image artifacts. Since all the images except backend are tiny, the lack of build parallelism doesn't matter. Signed-off-by: John Strunk --- .github/workflows/build_docker.yml | 106 ++++++++++++----------------- 1 file changed, 44 insertions(+), 62 deletions(-) diff --git a/.github/workflows/build_docker.yml b/.github/workflows/build_docker.yml index bf23ecee23..f99831da32 100644 --- a/.github/workflows/build_docker.yml +++ b/.github/workflows/build_docker.yml @@ -13,17 +13,8 @@ on: permissions: {} jobs: - build-image: - name: Build image - permissions: - contents: read - strategy: - matrix: - image: - - backend - - database - - keyman - - rabbitmq + test-e2e: + name: End-to-end test runs-on: ubuntu-latest steps: - name: Checkout repository @@ -33,64 +24,49 @@ jobs: uses: docker/setup-buildx-action@v3 id: setup-buildx - - name: Build container - id: build + - name: Build database container uses: docker/build-push-action@v6 with: context: . - file: ./docker/${{ matrix.image }}/Dockerfile + file: ./docker/database/Dockerfile platforms: linux/amd64 - tags: ghcr.io/${{ github.repository_owner }}/augur_${{ matrix.image }}:test - cache-from: type=gha,scope=container-${{ matrix.image }} - cache-to: type=gha,scope=container-${{ matrix.image }},mode=min - outputs: type=docker,dest=/tmp/${{ matrix.image }}-image.tar - - - name: Save image as artifact - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.image }}-image - path: /tmp/${{ matrix.image }}-image.tar - - - - smoke-test: - name: Smoke test - needs: build-image - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Download image artifact (backend) - uses: actions/download-artifact@v4 - with: - name: backend-image - path: /tmp + tags: ghcr.io/${{ github.repository_owner }}/augur_database:test + cache-from: type=gha,scope=container-database + cache-to: type=gha,scope=container-database,mode=min + load: true - - name: Dowload image artifact (keyman) - uses: actions/download-artifact@v4 + - name: Build keyman container + uses: docker/build-push-action@v6 with: - name: keyman-image - path: /tmp + context: . + file: ./docker/keyman/Dockerfile + platforms: linux/amd64 + tags: ghcr.io/${{ github.repository_owner }}/augur_keyman:test + cache-from: type=gha,scope=container-keyman + cache-to: type=gha,scope=container-keyman,mode=min + load: true - - name: Dowload image artifact (database) - uses: actions/download-artifact@v4 + - name: Build rabbitmq container + uses: docker/build-push-action@v6 with: - name: database-image - path: /tmp + context: . + file: ./docker/rabbitmq/Dockerfile + platforms: linux/amd64 + tags: ghcr.io/${{ github.repository_owner }}/augur_rabbitmq:test + cache-from: type=gha,scope=container-rabbitmq + cache-to: type=gha,scope=container-rabbitmq,mode=min + load: true - - name: Dowload image artifact (rabbitmq) - uses: actions/download-artifact@v4 + - name: Build backend container + uses: docker/build-push-action@v6 with: - name: rabbitmq-image - path: /tmp - - - name: Load images - run: | - docker load -i /tmp/backend-image.tar - docker load -i /tmp/database-image.tar - docker load -i /tmp/keyman-image.tar - docker load -i /tmp/rabbitmq-image.tar + context: . + file: ./docker/backend/Dockerfile + platforms: linux/amd64 + tags: ghcr.io/${{ github.repository_owner }}/augur_backend:test + cache-from: type=gha,scope=container-backend + cache-to: type=gha,scope=container-backend,mode=min + load: true - name: Prepare compose file run: | @@ -133,6 +109,10 @@ jobs: EOF - name: Start services & wait for output + # This starts the system and sends the output to "await_all.py" which + # scans for the regex matches from above. Once all matches are seen at + # least once, the `compose down` will run to shut down the system. If + # this all doesn't happen before the timeout, the job will fail. run: | docker compose -f docker-compose.yml up --no-build 2>&1 \ | (./scripts/ci/await_all.py /tmp/regex_matches.txt \ @@ -140,19 +120,21 @@ jobs: timeout-minutes: 3 env: AUGUR_GITHUB_API_KEY: ${{ secrets.GITHUB_TOKEN }} - AUGUR_GITHUB_USERNAME: dummy + AUGUR_GITHUB_USERNAME: ${{ github.repository_owner }} AUGUR_GITLAB_API_KEY: dummy AUGUR_GITLAB_USERNAME: dummy - name: Dump logs - if: always() # Always run this step to get logs even if the previous step fails + # Always run this step to get logs, even if the previous step fails + if: always() + # We use tail so that we can see the name of each file as it's printed run: "docker run -t --rm -v augur_logs:/logs bash -c 'find /logs -type f | xargs tail -n +0'" push-image: name: Push image - needs: smoke-test + needs: test-e2e # We don't push images on pull requests if: github.event_name != 'pull_request' permissions: From 9e3a5912b72b53da5e0ec8990605ada369aa19cf Mon Sep 17 00:00:00 2001 From: John Strunk Date: Wed, 21 May 2025 09:33:58 -0400 Subject: [PATCH 6/6] Fix database env vars Fixed warning about "Legacy key/value format with whitespace separator should not be used" in Dockerfiles Signed-off-by: John Strunk --- docker/database/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/database/Dockerfile b/docker/database/Dockerfile index 9d677fd2be..f5f954749c 100644 --- a/docker/database/Dockerfile +++ b/docker/database/Dockerfile @@ -4,9 +4,9 @@ FROM postgres:16 LABEL maintainer="outdoors@acm.org" LABEL version="0.86.1" -ENV POSTGRES_DB "test" -ENV POSTGRES_USER "augur" -ENV POSTGRES_PASSWORD "augur" +ENV POSTGRES_DB="test" +ENV POSTGRES_USER="augur" +ENV POSTGRES_PASSWORD="augur" EXPOSE 5432