diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 35eed124..9c6a95de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,7 +66,7 @@ jobs: run: tools/ut.sh -j python_tests: - name: python tests on ${{ matrix.os }} ${{ matrix.python-version}} + name: python tests (non-integration) on ${{ matrix.os }} ${{ matrix.python-version}} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -88,8 +88,67 @@ jobs: uses: astral-sh/setup-uv@v4 with: version: "latest" - - name: Run Python Tests + - name: Run Python Tests (excluding Integration) run: tools/ut.sh -p + env: + PYTEST_SKIP_MARKERS: "integration" + + ollama_integration_tests: + name: ollama integration tests (linux python 3.10) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install java + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'adopt' + - name: Install python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + - name: Run Ollama Integration Tests + run: tools/ut.sh -p + env: + PYTEST_ONLY_MARKERS: "ollama" + PYTEST_VERBOSE: "true" + CI: true + - name: Upload Ollama logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: ollama-logs + path: /tmp/ollama*.log + retention-days: 7 + + integration_tests: + name: integration tests (linux python 3.10) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install java + uses: actions/setup-java@v4 + with: + java-version: '11' + distribution: 'adopt' + - name: Install python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + - name: Run Integration Tests + run: tools/ut.sh -p + env: + PYTEST_ONLY_MARKERS: "integration" + PYTEST_VERBOSE: "true" + CI: true e2e_tests: name: e2e tests on ${{ matrix.os }} ${{ matrix.python-version}} diff --git a/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py b/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py index 95a77e5c..e86bafb1 100644 --- a/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py +++ b/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py @@ -27,7 +27,10 @@ ) from flink_agents.plan.tools.function_tool import from_callable -test_model = os.environ.get("TEST_MODEL") +# Mark all tests in this module as integration tests +pytestmark = pytest.mark.integration + +test_model = os.environ.get("TEST_MODEL", "claude-3-haiku-20240307") api_key = os.environ.get("TEST_API_KEY") diff --git a/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py b/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py index dfd67eae..a4f7d4e8 100644 --- a/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py +++ b/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py @@ -27,7 +27,10 @@ ) from flink_agents.plan.tools.function_tool import from_callable -test_model = os.environ.get("TEST_MODEL") +# Mark all tests in this module as integration tests +pytestmark = pytest.mark.integration + +test_model = os.environ.get("TEST_MODEL", "gpt-3.5-turbo") api_key = os.environ.get("TEST_API_KEY") api_base_url = os.environ.get("TEST_API_BASE_URL") diff --git a/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py b/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py index 653fb76b..c0b157ac 100644 --- a/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py +++ b/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py @@ -32,12 +32,15 @@ ) from flink_agents.plan.tools.function_tool import FunctionTool, from_callable +# Mark all tests in this module as ollama tests +pytestmark = pytest.mark.ollama + test_model = os.environ.get("OLLAMA_CHAT_MODEL", "qwen3:0.6b") current_dir = Path(__file__).parent try: - # only auto setup ollama in ci with python 3.10 to reduce ci cost. - if "3.10" in sys.version: + # Auto setup ollama in CI environment (when CI env var is set) + if os.environ.get("CI") and sys.platform == "linux": subprocess.run( ["bash", f"{current_dir}/start_ollama_server.sh"], timeout=300, check=True ) @@ -60,7 +63,9 @@ client is None, reason="Ollama client is not available or test model is missing" ) def test_ollama_chat() -> None: # noqa :D103 - server = OllamaChatModelConnection(name="ollama", request_timeout=120.0) + # Use longer timeout in CI environment (slower resources) + request_timeout = 120.0 if os.environ.get("CI") else 30.0 + server = OllamaChatModelConnection(name="ollama", request_timeout=request_timeout) response = server.chat( [ChatMessage(role=MessageRole.USER, content="Hello!")], model=test_model ) @@ -94,7 +99,9 @@ def get_tool(name: str, type: ResourceType) -> FunctionTool: # noqa :D103 client is None, reason="Ollama client is not available or test model is missing" ) def test_ollama_chat_with_tools() -> None: # noqa :D103 - connection = OllamaChatModelConnection(name="ollama", request_timeout=120.0) + # Use longer timeout for tool calling in CI environment (slower resources) + request_timeout = 120.0 if os.environ.get("CI") else 30.0 + connection = OllamaChatModelConnection(name="ollama", request_timeout=request_timeout) def get_resource(name: str, type: ResourceType) -> Resource: if type == ResourceType.TOOL: diff --git a/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py b/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py index 3dfc6e01..4135cded 100644 --- a/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py +++ b/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py @@ -29,12 +29,15 @@ OllamaEmbeddingModelSetup, ) +# Mark all tests in this module as ollama tests +pytestmark = pytest.mark.ollama + test_model = os.environ.get("OLLAMA_EMBEDDING_MODEL", "all-minilm:22m") current_dir = Path(__file__).parent try: - # only auto setup ollama in ci with python 3.10 to reduce ci cost. - if "3.10" in sys.version: + # Auto setup ollama in CI environment (when CI env var is set) + if os.environ.get("CI") and sys.platform == "linux": subprocess.run( ["bash", f"{current_dir}/start_ollama_server.sh"], timeout=300, check=True ) @@ -58,9 +61,12 @@ ) def test_ollama_embedding_setup() -> None: """Test embedding functionality with OllamaEmbeddingModelSetup.""" + # Use longer timeout for embedding in CI environment (slower resources) + request_timeout = 120.0 if os.environ.get("CI") else 30.0 connection = OllamaEmbeddingModelConnection( name="ollama_embed", - base_url="http://localhost:11434" + base_url="http://localhost:11434", + request_timeout=request_timeout ) def get_resource(name: str, type: ResourceType) -> Resource: diff --git a/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py b/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py index 3f50736c..49e9d6cf 100644 --- a/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py +++ b/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py @@ -25,14 +25,18 @@ OpenAIEmbeddingModelSetup, ) +# Mark all tests in this module as integration tests +pytestmark = pytest.mark.integration + test_model = os.environ.get("TEST_EMBEDDING_MODEL", "text-embedding-3-small") api_key = os.environ.get("TEST_API_KEY") @pytest.mark.skipif(api_key is None, reason="TEST_API_KEY is not set") +@pytest.mark.integration def test_openai_embedding_model() -> None: # noqa: D103 connection = OpenAIEmbeddingModelConnection( - name="openai", api_key=api_key + name="openai", api_key=api_key or "fake-key" ) def get_resource(name: str, type: ResourceType) -> Resource: diff --git a/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py b/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py index 233c6b6a..2b468441 100644 --- a/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py +++ b/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py @@ -20,13 +20,6 @@ import pytest -try: - import chromadb # noqa: F401 - - chromadb_available = True -except ImportError: - chromadb_available = False - from flink_agents.api.resource import Resource, ResourceType from flink_agents.api.vector_stores.vector_store import ( VectorStoreQuery, @@ -35,12 +28,25 @@ ChromaVectorStore, ) +# Mark all tests in this module as integration tests +pytestmark = pytest.mark.integration + +try: + import chromadb # noqa: F401 + + chromadb_available = True +except ImportError: + chromadb_available = False + api_key = os.environ.get("TEST_API_KEY") tenant = os.environ.get("TEST_TENANT") database = os.environ.get("TEST_DATABASE") class MockEmbeddingModel(Resource): # noqa: D101 + def __init__(self, name: str) -> None: # noqa: D107 + self._name = name + @classmethod def resource_type(cls) -> ResourceType: # noqa: D102 return ResourceType.EMBEDDING_MODEL @@ -113,6 +119,7 @@ def get_resource(name: str, resource_type: ResourceType) -> Resource: @pytest.mark.skipif(api_key is None, reason="TEST_API_KEY is not set") +@pytest.mark.integration def test_cloud_chroma_vector_store() -> None: """Test cloud ChromaDB vector store with embedding model integration.""" embedding_model = MockEmbeddingModel(name="mock_embeddings") @@ -128,9 +135,9 @@ def get_resource(name: str, resource_type: ResourceType) -> Resource: name="chroma_vector_store", embedding_model="mock_embeddings", collection="test_collection", - api_key=api_key, - tenant=tenant, - database=database, + api_key=api_key or "fake-key", + tenant=tenant or "fake-tenant", + database=database or "fake-database", get_resource=get_resource ) diff --git a/python/pyproject.toml b/python/pyproject.toml index e420e657..18c99a49 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -192,5 +192,10 @@ ban-relative-imports = "all" [tool.ruff.lint.flake8-type-checking] strict = true +[tool.pytest.ini_options] +markers = [ + "integration: marks tests that require external services or integration (deselect with '-m \"not integration\"')", +] + [tool.ruff.format] docstring-code-format = true diff --git a/tools/ut.sh b/tools/ut.sh index acb0307c..07c5266a 100755 --- a/tools/ut.sh +++ b/tools/ut.sh @@ -122,6 +122,21 @@ python_tests() { set +e pushd "${ROOT}"/python + # Build pytest arguments array based on environment variables + local pytest_args=("pytest" "flink_agents") + + # Support marker-based filtering for cleaner argument handling + if [ -n "${PYTEST_SKIP_MARKERS:-}" ]; then + pytest_args+=("-m" "not ${PYTEST_SKIP_MARKERS}") + elif [ -n "${PYTEST_ONLY_MARKERS:-}" ]; then + pytest_args+=("-m" "${PYTEST_ONLY_MARKERS}") + fi + + # Add verbose flag if requested + if [ "${PYTEST_VERBOSE:-false}" = "true" ]; then + pytest_args+=("-v") + fi + # Install dependencies and run tests echo "Installing Python test dependencies..." if command -v uv >/dev/null 2>&1; then @@ -131,8 +146,9 @@ python_tests() { uv sync --extra test if $verbose; then echo "Running tests with uv..." + echo "Command: uv run ${pytest_args[*]}" fi - uv run pytest flink_agents + uv run "${pytest_args[@]}" testcode=$? else if $verbose; then @@ -152,8 +168,9 @@ python_tests() { fi if $verbose; then echo "Running tests with pytest..." + echo "Command: ${pytest_args[*]}" fi - pytest flink_agents + "${pytest_args[@]}" testcode=$? fi