From 9dfae8c0db07ea7f5f60cc5a20c45749d396eb7c Mon Sep 17 00:00:00 2001 From: Lee Penkman Date: Sat, 7 Jun 2025 13:57:25 +1200 Subject: [PATCH 1/2] Update CI for Python 3.12 and fix websockets --- .github/workflows/ci.yml | 54 ++++++++++++++++++ .pre-commit-config.yaml | 5 ++ pytest.ini | 20 ++++++- requirements-test.txt | 5 +- requirements.in | 3 +- requirements.txt | 2 +- ruff.toml | 2 + .../{integ => integration}/cutoff_example.py | 0 .../data/f2bjrop1.0.wav | Bin .../questions/payments/test_payments.py | 0 tests/{ => integration}/test_ai_wrapper.py | 7 ++- .../test_audio_length_limit.py | 2 + .../{integ => integration}/test_bad_cutoff.py | 3 + .../test_inference_server.py | 3 + .../test_inference_server_speech.py | 3 + tests/{ => integration}/test_main.py | 3 + tests/{ => integration}/test_main_unit.py | 3 + tests/performance/test_e2e_perf.py | 2 + tests/test_doc_api.py | 8 +++ tests/unit/questions/test_link_enricher.py | 7 ++- tests/unit/test_perplexity.py | 17 +++++- tests/unit/test_post_process_results.py | 7 +++ tests/unit/test_summarization.py | 5 ++ 23 files changed, 148 insertions(+), 13 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .pre-commit-config.yaml create mode 100644 ruff.toml rename tests/{integ => integration}/cutoff_example.py (100%) rename tests/{integ => integration}/data/f2bjrop1.0.wav (100%) rename tests/{integ => integration}/questions/payments/test_payments.py (100%) rename tests/{ => integration}/test_ai_wrapper.py (81%) rename tests/{integ => integration}/test_audio_length_limit.py (95%) rename tests/{integ => integration}/test_bad_cutoff.py (96%) rename tests/{integ => integration}/test_inference_server.py (98%) rename tests/{integ => integration}/test_inference_server_speech.py (97%) rename tests/{ => integration}/test_main.py (98%) rename tests/{ => integration}/test_main_unit.py (99%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..718c08f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,54 @@ +name: CI + +on: + push: + branches: ["main"] + pull_request: + +jobs: + unit: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11', '3.12'] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Cache uv + uses: actions/cache@v3 + with: + path: ~/.cache/uv + key: ${{ runner.os }}-uv-${{ hashFiles('requirements.txt', 'requirements-test.txt') }} + - name: Install uv + run: pip install uv + - name: Install dependencies + run: uv pip install --system -r requirements.txt -r requirements-test.txt + - name: Ruff + run: ruff . + - name: Run unit tests + run: pytest --cov=questions --cov-report=xml -q + - name: Upload coverage + uses: actions/upload-artifact@v4 + with: + name: coverage-xml + path: coverage.xml + integration: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Cache uv + uses: actions/cache@v3 + with: + path: ~/.cache/uv + key: ${{ runner.os }}-uv-${{ hashFiles('requirements.txt', 'requirements-test.txt') }} + - name: Install uv + run: pip install uv + - name: Install dependencies + run: uv pip install --system -r requirements.txt -r requirements-test.txt + - name: Run integration tests + run: pytest -m integration -q diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a09a638 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.5 + hooks: + - id: ruff diff --git a/pytest.ini b/pytest.ini index f615d53..91b4614 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,7 +1,21 @@ -[tool:pytest] -norecursedirs = '.*', 'build', 'dist', 'CVS', '_darcs', '{arch}', '*.egg', 'static', "models", "templates", "kuber", gameon questions/tools - [pytest] +norecursedirs = + .* + build + dist + CVS + _darcs + {arch} + *.egg + static + models + templates + kuber + gameon + questions/tools + tests/integration + tests/performance + asyncio_mode=auto pythonpath = . diff --git a/requirements-test.txt b/requirements-test.txt index 0a3cf06..60c217c 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,5 +1,4 @@ pytest==7.3.1 -httpx==0.24.0 -fastapi==0.95.2 pytest-cov==4.1.0 -colorama==0.4.6 +ruff +httpx diff --git a/requirements.in b/requirements.in index 24b8e51..07efcde 100755 --- a/requirements.in +++ b/requirements.in @@ -25,8 +25,9 @@ google-cloud-storage google-cloud-ndb #jinja2 +# we need websockets <11 for pyppeteer compatibility jinja2 -websockets +websockets<11 #nltk nltk stripe diff --git a/requirements.txt b/requirements.txt index 6e72ee2..a1a1ac8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -194,7 +194,7 @@ urllib3==2.2.3 # via requests uvicorn==0.34.2 # via -r requirements.in -websockets==12.0 +websockets==10.4 # via -r requirements.in wrapt==1.11.2 # via diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..6801538 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,2 @@ +line-length = 120 +exclude = [".venv"] diff --git a/tests/integ/cutoff_example.py b/tests/integration/cutoff_example.py similarity index 100% rename from tests/integ/cutoff_example.py rename to tests/integration/cutoff_example.py diff --git a/tests/integ/data/f2bjrop1.0.wav b/tests/integration/data/f2bjrop1.0.wav similarity index 100% rename from tests/integ/data/f2bjrop1.0.wav rename to tests/integration/data/f2bjrop1.0.wav diff --git a/tests/integ/questions/payments/test_payments.py b/tests/integration/questions/payments/test_payments.py similarity index 100% rename from tests/integ/questions/payments/test_payments.py rename to tests/integration/questions/payments/test_payments.py diff --git a/tests/test_ai_wrapper.py b/tests/integration/test_ai_wrapper.py similarity index 81% rename from tests/test_ai_wrapper.py rename to tests/integration/test_ai_wrapper.py index c60f4fc..33603e2 100644 --- a/tests/test_ai_wrapper.py +++ b/tests/integration/test_ai_wrapper.py @@ -1,12 +1,15 @@ import pytest + +pytestmark = pytest.mark.integration + from questions.ai_wrapper import generate_with_claude @pytest.mark.asyncio async def test_generate_with_claude(): prompt = "What is the capital of France?" response = await generate_with_claude(prompt) - + assert response is not None assert isinstance(response, str) assert len(response) > 0 - assert "Paris" in response \ No newline at end of file + assert "Paris" in response diff --git a/tests/integ/test_audio_length_limit.py b/tests/integration/test_audio_length_limit.py similarity index 95% rename from tests/integ/test_audio_length_limit.py rename to tests/integration/test_audio_length_limit.py index 9d600f3..14dc2fe 100644 --- a/tests/integ/test_audio_length_limit.py +++ b/tests/integration/test_audio_length_limit.py @@ -1,6 +1,8 @@ import os import pytest +pytestmark = pytest.mark.integration + from starlette.testclient import TestClient from questions.inference_server.inference_server import app, audio_process diff --git a/tests/integ/test_bad_cutoff.py b/tests/integration/test_bad_cutoff.py similarity index 96% rename from tests/integ/test_bad_cutoff.py rename to tests/integration/test_bad_cutoff.py index 20a8a13..aeddc44 100644 --- a/tests/integ/test_bad_cutoff.py +++ b/tests/integration/test_bad_cutoff.py @@ -1,5 +1,8 @@ import requests import os +import pytest + +pytestmark = pytest.mark.integration from sellerinfo import TEXT_GENERATOR_SECRET diff --git a/tests/integ/test_inference_server.py b/tests/integration/test_inference_server.py similarity index 98% rename from tests/integ/test_inference_server.py rename to tests/integration/test_inference_server.py index 1bf3113..cc50306 100644 --- a/tests/integ/test_inference_server.py +++ b/tests/integration/test_inference_server.py @@ -1,5 +1,8 @@ import dataclasses import os +import pytest + +pytestmark = pytest.mark.integration from fastapi import UploadFile diff --git a/tests/integ/test_inference_server_speech.py b/tests/integration/test_inference_server_speech.py similarity index 97% rename from tests/integ/test_inference_server_speech.py rename to tests/integration/test_inference_server_speech.py index 42e9e9f..89d964a 100644 --- a/tests/integ/test_inference_server_speech.py +++ b/tests/integration/test_inference_server_speech.py @@ -1,4 +1,7 @@ import os +import pytest + +pytestmark = pytest.mark.integration from questions.utils import log_time diff --git a/tests/test_main.py b/tests/integration/test_main.py similarity index 98% rename from tests/test_main.py rename to tests/integration/test_main.py index 8e02649..35020a1 100755 --- a/tests/test_main.py +++ b/tests/integration/test_main.py @@ -1,4 +1,7 @@ import dataclasses +import pytest + +pytestmark = pytest.mark.integration from starlette.testclient import TestClient diff --git a/tests/test_main_unit.py b/tests/integration/test_main_unit.py similarity index 99% rename from tests/test_main_unit.py rename to tests/integration/test_main_unit.py index cee9078..9159f19 100755 --- a/tests/test_main_unit.py +++ b/tests/integration/test_main_unit.py @@ -1,4 +1,7 @@ import pytest + +pytestmark = pytest.mark.integration + from starlette.testclient import TestClient from questions.inference_server.inference_server import app, generate_route, openai_route diff --git a/tests/performance/test_e2e_perf.py b/tests/performance/test_e2e_perf.py index 9557958..d7895e9 100755 --- a/tests/performance/test_e2e_perf.py +++ b/tests/performance/test_e2e_perf.py @@ -1,6 +1,8 @@ import traceback import pytest + +pytestmark = pytest.mark.integration import requests import logging from questions.logging_config import setup_logging diff --git a/tests/test_doc_api.py b/tests/test_doc_api.py index a015f0c..8ac1460 100644 --- a/tests/test_doc_api.py +++ b/tests/test_doc_api.py @@ -1,6 +1,14 @@ import json from unittest.mock import patch, MagicMock, AsyncMock import pytest +import os + +if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"): + pytest.skip("integration test requires Google credentials", allow_module_level=True) + +pytest.importorskip("google.cloud.ndb", reason="google cloud ndb required for document api tests") + +pytestmark = pytest.mark.integration from main import list_documents, get_document, save_document, autosave_document diff --git a/tests/unit/questions/test_link_enricher.py b/tests/unit/questions/test_link_enricher.py index 7dc3c61..b0a023a 100644 --- a/tests/unit/questions/test_link_enricher.py +++ b/tests/unit/questions/test_link_enricher.py @@ -1,7 +1,12 @@ -from questions.link_enricher import get_urls, enrich_links import logging +import pytest + +bs4 = pytest.importorskip("bs4", reason="bs4 required for link enrichment tests") +from questions.link_enricher import get_urls, enrich_links from questions.logging_config import setup_logging +pytestmark = pytest.mark.integration + setup_logging() logger = logging.getLogger(__name__) diff --git a/tests/unit/test_perplexity.py b/tests/unit/test_perplexity.py index d9c7264..fd23f83 100644 --- a/tests/unit/test_perplexity.py +++ b/tests/unit/test_perplexity.py @@ -1,11 +1,24 @@ -from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2TokenizerFast, BloomTokenizerFast, BloomForCausalLM +import pytest + +transformers = pytest.importorskip( + "transformers", reason="transformers is required for perplexity tests" +) +from transformers import ( + GPT2LMHeadModel, + GPT2Tokenizer, + GPT2TokenizerFast, + BloomTokenizerFast, + BloomForCausalLM, +) import logging from questions.logging_config import setup_logging +pytestmark = pytest.mark.integration + setup_logging() logger = logging.getLogger(__name__) -import torch +torch = pytest.importorskip("torch", reason="torch is required for perplexity tests") from questions.perplexity import get_perplexity diff --git a/tests/unit/test_post_process_results.py b/tests/unit/test_post_process_results.py index f4f84bf..9a58bea 100755 --- a/tests/unit/test_post_process_results.py +++ b/tests/unit/test_post_process_results.py @@ -1,5 +1,12 @@ +import pytest + +transformers = pytest.importorskip( + "transformers", reason="transformers required for post-process tests" +) from transformers import AutoTokenizer +pytestmark = pytest.mark.integration + from questions.models import GenerateParams from questions.post_process_results import post_process_results diff --git a/tests/unit/test_summarization.py b/tests/unit/test_summarization.py index 06c08da..755df60 100644 --- a/tests/unit/test_summarization.py +++ b/tests/unit/test_summarization.py @@ -1,6 +1,11 @@ +import pytest +pytest.importorskip("torch", reason="torch required for summarization tests") from questions.inference_server.inference_server import MODEL_CACHE from questions.summarization import get_extractive_summary from questions.utils import log_time +import pytest + +pytestmark = pytest.mark.integration text = """ # classification = summarizer("James Joseph Norton is an American comedian, radio personality, actor, author, and television and podcast host. Norton has been the co-host of the podcast UFC Unfiltered with Matt Serra and the morning radio show Jim Norton & Sam Roberts on SiriusXM Radio since 2016, and The Chip Chipperson Podacast since 2017. He gained initial prominence as third mic on the radio show Opie and Anthony, with Gregg \"Opie\" Hughes and Anthony Cumia, from 2001 to 2014. After becoming a stand-up comedian in 1990, Norton spent his early years developing his act. His appearances on The Louie Show caught the attention of comedian Andrew Dice Clay in 1997, who chose Norton to open for him for his shows. In 2000, Norton made his debut on Opie and Anthony and joined the show as a third mic in 2001 which increased his national exposure. He went on to have a recurring role on the sitcom Lucky Louie and featured as a regular panellist on Tough Crowd with Colin Quinn. Since he joined SiriusXM with Opie and Anthony in 2004, Norton hosted Opie with Jim Norton from 2014 to 2016, and The Jim Norton Advice Show. Since 2003, Norton has released four comedy albums and seven comedy specials, including three on Epix and one on Netflix. In 2014, Norton hosted The Jim Norton Show, a talk show on Vice.") From 93811cae1afeba8f22ea5e13f28ff14e17ee6019 Mon Sep 17 00:00:00 2001 From: Lee Penkman Date: Sat, 7 Jun 2025 14:06:56 +1200 Subject: [PATCH 2/2] Add developer helpers --- .github/workflows/ci.yml | 26 ++++++++++++++++++- .gitignore | 1 + .pre-commit-config.yaml | 7 +++++ Makefile | 13 ++++++++++ README.md | 10 +++++++ pytest.ini | 9 ++++--- requirements-test.txt | 1 + tests/integration/test_ai_wrapper.py | 2 +- tests/integration/test_bad_cutoff.py | 4 ++- tests/integration/test_inference_server.py | 2 +- .../test_inference_server_speech.py | 2 +- tests/performance/test_e2e_perf.py | 2 +- tests/test_doc_api.py | 2 +- 13 files changed, 70 insertions(+), 11 deletions(-) create mode 100644 Makefile diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 718c08f..a4e9335 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,8 +34,30 @@ jobs: with: name: coverage-xml path: coverage.xml + offline-integration: + runs-on: ubuntu-latest + needs: unit + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Cache uv + uses: actions/cache@v3 + with: + path: ~/.cache/uv + key: ${{ runner.os }}-uv-${{ hashFiles('requirements.txt', 'requirements-test.txt') }} + - name: Install uv + run: pip install uv + - name: Install dependencies + run: uv pip install --system -r requirements.txt -r requirements-test.txt + - name: Download NLTK data + run: python -m nltk.downloader punkt + - name: Run offline integration tests + run: pytest -m "integration and not internet" -q integration: runs-on: ubuntu-latest + needs: offline-integration steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -50,5 +72,7 @@ jobs: run: pip install uv - name: Install dependencies run: uv pip install --system -r requirements.txt -r requirements-test.txt + - name: Download NLTK data + run: python -m nltk.downloader punkt - name: Run integration tests - run: pytest -m integration -q + run: pytest -m "integration and internet" -q diff --git a/.gitignore b/.gitignore index a54d7ca..3638678 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,4 @@ __pycache__ flagged mainserver.log +nltk_data/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a09a638..867cee2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,3 +3,10 @@ repos: rev: v0.0.5 hooks: - id: ruff + - repo: local + hooks: + - id: pytest + name: pytest + entry: pytest -q + language: system + pass_filenames: false diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..66a0c4a --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +.PHONY: lint test offline-integration integration + +lint: + ruff check . + +test: + pytest -q + +offline-integration: + pytest -m "integration and not internet" -q || true + +integration: + pytest -m "integration and internet" -q || true diff --git a/README.md b/README.md index 73116ab..f5cb749 100755 --- a/README.md +++ b/README.md @@ -83,6 +83,16 @@ Using cuda is important to speed up inference. python -m nltk.downloader punkt ``` +### Running offline integration tests + +Offline integration tests exercise functionality that does not require internet +access but may load heavy dependencies. After installing the `punkt` dataset +you can run them with: + +```shell +pytest -m "integration and not internet" +``` + Set up some environment variables in this file (fake ones are okay for local dev) ```shell diff --git a/pytest.ini b/pytest.ini index 91b4614..c5fd463 100644 --- a/pytest.ini +++ b/pytest.ini @@ -12,9 +12,7 @@ norecursedirs = templates kuber gameon - questions/tools - tests/integration - tests/performance + questions/tools asyncio_mode=auto pythonpath = . @@ -24,6 +22,9 @@ testpaths = tests # cwd workdir = . -addopts = -s -v +addopts = -s -v --ignore=tests/integration --ignore=tests/performance +markers = + integration: integration tests + internet: tests that require internet access env = GOOGLE_APPLICATION_CREDENTIALS = secrets/google-credentials.json diff --git a/requirements-test.txt b/requirements-test.txt index 60c217c..8a1e38e 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,3 +2,4 @@ pytest==7.3.1 pytest-cov==4.1.0 ruff httpx +colorama diff --git a/tests/integration/test_ai_wrapper.py b/tests/integration/test_ai_wrapper.py index 33603e2..bae8c50 100644 --- a/tests/integration/test_ai_wrapper.py +++ b/tests/integration/test_ai_wrapper.py @@ -1,6 +1,6 @@ import pytest -pytestmark = pytest.mark.integration +pytestmark = [pytest.mark.integration, pytest.mark.internet] from questions.ai_wrapper import generate_with_claude diff --git a/tests/integration/test_bad_cutoff.py b/tests/integration/test_bad_cutoff.py index aeddc44..5532049 100644 --- a/tests/integration/test_bad_cutoff.py +++ b/tests/integration/test_bad_cutoff.py @@ -2,7 +2,9 @@ import os import pytest -pytestmark = pytest.mark.integration +pytestmark = [pytest.mark.integration, pytest.mark.internet] + +pytest.skip("manual script", allow_module_level=True) from sellerinfo import TEXT_GENERATOR_SECRET diff --git a/tests/integration/test_inference_server.py b/tests/integration/test_inference_server.py index cc50306..b78758a 100644 --- a/tests/integration/test_inference_server.py +++ b/tests/integration/test_inference_server.py @@ -2,7 +2,7 @@ import os import pytest -pytestmark = pytest.mark.integration +pytestmark = [pytest.mark.integration, pytest.mark.internet] from fastapi import UploadFile diff --git a/tests/integration/test_inference_server_speech.py b/tests/integration/test_inference_server_speech.py index 89d964a..5514f57 100644 --- a/tests/integration/test_inference_server_speech.py +++ b/tests/integration/test_inference_server_speech.py @@ -1,7 +1,7 @@ import os import pytest -pytestmark = pytest.mark.integration +pytestmark = [pytest.mark.integration, pytest.mark.internet] from questions.utils import log_time diff --git a/tests/performance/test_e2e_perf.py b/tests/performance/test_e2e_perf.py index d7895e9..60eb7d1 100755 --- a/tests/performance/test_e2e_perf.py +++ b/tests/performance/test_e2e_perf.py @@ -2,7 +2,7 @@ import pytest -pytestmark = pytest.mark.integration +pytestmark = [pytest.mark.integration, pytest.mark.internet] import requests import logging from questions.logging_config import setup_logging diff --git a/tests/test_doc_api.py b/tests/test_doc_api.py index 8ac1460..d0768e4 100644 --- a/tests/test_doc_api.py +++ b/tests/test_doc_api.py @@ -8,7 +8,7 @@ pytest.importorskip("google.cloud.ndb", reason="google cloud ndb required for document api tests") -pytestmark = pytest.mark.integration +pytestmark = [pytest.mark.integration, pytest.mark.internet] from main import list_documents, get_document, save_document, autosave_document