apache · kitalkuyo-gita · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -66,7 +66,7 @@ jobs:
         run: tools/ut.sh -j
 
   python_tests:
-    name: python tests on ${{ matrix.os }} ${{ matrix.python-version}}
+    name: python tests (non-integration) on ${{ matrix.os }} ${{ matrix.python-version}}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -88,8 +88,67 @@ jobs:
         uses: astral-sh/setup-uv@v4
         with:
           version: "latest"
-      - name: Run Python Tests
+      - name: Run Python Tests (excluding Integration)
         run: tools/ut.sh -p
+        env:
+          PYTEST_SKIP_MARKERS: "integration"
+
+  ollama_integration_tests:
+    name: ollama integration tests (linux python 3.10)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install java
+        uses: actions/setup-java@v4
+        with:
+          java-version: '11'
+          distribution: 'adopt'
+      - name: Install python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+      - name: Run Ollama Integration Tests
+        run: tools/ut.sh -p
+        env:
+          PYTEST_ONLY_MARKERS: "ollama"
+          PYTEST_VERBOSE: "true"
+          CI: true
+      - name: Upload Ollama logs on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ollama-logs
+          path: /tmp/ollama*.log
+          retention-days: 7
+
+  integration_tests:
+    name: integration tests (linux python 3.10)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install java
+        uses: actions/setup-java@v4
+        with:
+          java-version: '11'
+          distribution: 'adopt'
+      - name: Install python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+      - name: Run Integration Tests
+        run: tools/ut.sh -p
+        env:
+          PYTEST_ONLY_MARKERS: "integration"
+          PYTEST_VERBOSE: "true"
+          CI: true
 
   e2e_tests:
     name: e2e tests on ${{ matrix.os }} ${{ matrix.python-version}}

diff --git a/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py b/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py
@@ -27,7 +27,10 @@
 )
 from flink_agents.plan.tools.function_tool import from_callable
 
-test_model = os.environ.get("TEST_MODEL")
+# Mark all tests in this module as integration tests
+pytestmark = pytest.mark.integration
+
+test_model = os.environ.get("TEST_MODEL", "claude-3-haiku-20240307")
 api_key = os.environ.get("TEST_API_KEY")
 
 

diff --git a/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py b/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py
@@ -27,7 +27,10 @@
 )
 from flink_agents.plan.tools.function_tool import from_callable
 
-test_model = os.environ.get("TEST_MODEL")
+# Mark all tests in this module as integration tests
+pytestmark = pytest.mark.integration
+
+test_model = os.environ.get("TEST_MODEL", "gpt-3.5-turbo")
 api_key = os.environ.get("TEST_API_KEY")
 api_base_url = os.environ.get("TEST_API_BASE_URL")
 

diff --git a/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py b/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py
@@ -32,12 +32,15 @@
 )
 from flink_agents.plan.tools.function_tool import FunctionTool, from_callable
 
+# Mark all tests in this module as ollama tests
+pytestmark = pytest.mark.ollama
+
 test_model = os.environ.get("OLLAMA_CHAT_MODEL", "qwen3:0.6b")
 current_dir = Path(__file__).parent
 
 try:
-    # only auto setup ollama in ci with python 3.10 to reduce ci cost.
-    if "3.10" in sys.version:
+    # Auto setup ollama in CI environment (when CI env var is set)
+    if os.environ.get("CI") and sys.platform == "linux":
         subprocess.run(
             ["bash", f"{current_dir}/start_ollama_server.sh"], timeout=300, check=True
         )
@@ -60,7 +63,9 @@
     client is None, reason="Ollama client is not available or test model is missing"
 )
 def test_ollama_chat() -> None:  # noqa :D103
-    server = OllamaChatModelConnection(name="ollama", request_timeout=120.0)
+    # Use longer timeout in CI environment (slower resources)
+    request_timeout = 120.0 if os.environ.get("CI") else 30.0
+    server = OllamaChatModelConnection(name="ollama", request_timeout=request_timeout)
     response = server.chat(
         [ChatMessage(role=MessageRole.USER, content="Hello!")], model=test_model
     )
@@ -94,7 +99,9 @@ def get_tool(name: str, type: ResourceType) -> FunctionTool:  # noqa :D103
     client is None, reason="Ollama client is not available or test model is missing"
 )
 def test_ollama_chat_with_tools() -> None:  # noqa :D103
-    connection = OllamaChatModelConnection(name="ollama", request_timeout=120.0)
+    # Use longer timeout for tool calling in CI environment (slower resources)
+    request_timeout = 120.0 if os.environ.get("CI") else 30.0
+    connection = OllamaChatModelConnection(name="ollama", request_timeout=request_timeout)
 
     def get_resource(name: str, type: ResourceType) -> Resource:
         if type == ResourceType.TOOL:

diff --git a/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py b/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py
@@ -29,12 +29,15 @@
     OllamaEmbeddingModelSetup,
 )
 
+# Mark all tests in this module as ollama tests
+pytestmark = pytest.mark.ollama
+
 test_model = os.environ.get("OLLAMA_EMBEDDING_MODEL", "all-minilm:22m")
 current_dir = Path(__file__).parent
 
 try:
-    # only auto setup ollama in ci with python 3.10 to reduce ci cost.
-    if "3.10" in sys.version:
+    # Auto setup ollama in CI environment (when CI env var is set)
+    if os.environ.get("CI") and sys.platform == "linux":
         subprocess.run(
             ["bash", f"{current_dir}/start_ollama_server.sh"], timeout=300, check=True
         )
@@ -58,9 +61,12 @@
 )
 def test_ollama_embedding_setup() -> None:
     """Test embedding functionality with OllamaEmbeddingModelSetup."""
+    # Use longer timeout for embedding in CI environment (slower resources)
+    request_timeout = 120.0 if os.environ.get("CI") else 30.0
     connection = OllamaEmbeddingModelConnection(
         name="ollama_embed",
-        base_url="http://localhost:11434"
+        base_url="http://localhost:11434",
+        request_timeout=request_timeout
     )
 
     def get_resource(name: str, type: ResourceType) -> Resource:

diff --git a/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py b/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py
@@ -25,14 +25,18 @@
     OpenAIEmbeddingModelSetup,
 )
 
+# Mark all tests in this module as integration tests
+pytestmark = pytest.mark.integration
+
 test_model = os.environ.get("TEST_EMBEDDING_MODEL", "text-embedding-3-small")
 api_key = os.environ.get("TEST_API_KEY")
 
 
 @pytest.mark.skipif(api_key is None, reason="TEST_API_KEY is not set")
+@pytest.mark.integration
 def test_openai_embedding_model() -> None:  # noqa: D103
     connection = OpenAIEmbeddingModelConnection(
-        name="openai", api_key=api_key
+        name="openai", api_key=api_key or "fake-key"
     )
 
     def get_resource(name: str, type: ResourceType) -> Resource:

diff --git a/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py b/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py
@@ -20,13 +20,6 @@
 
 import pytest
 
-try:
-    import chromadb  # noqa: F401
-
-    chromadb_available = True
-except ImportError:
-    chromadb_available = False
-
 from flink_agents.api.resource import Resource, ResourceType
 from flink_agents.api.vector_stores.vector_store import (
     VectorStoreQuery,
@@ -35,12 +28,25 @@
     ChromaVectorStore,
 )
 
+# Mark all tests in this module as integration tests
+pytestmark = pytest.mark.integration
+
+try:
+    import chromadb  # noqa: F401
+
+    chromadb_available = True
+except ImportError:
+    chromadb_available = False
+
 api_key = os.environ.get("TEST_API_KEY")
 tenant = os.environ.get("TEST_TENANT")
 database = os.environ.get("TEST_DATABASE")
 
 
 class MockEmbeddingModel(Resource):  # noqa: D101
+    def __init__(self, name: str) -> None:  # noqa: D107
+        self._name = name
+
     @classmethod
     def resource_type(cls) -> ResourceType:  # noqa: D102
         return ResourceType.EMBEDDING_MODEL
@@ -113,6 +119,7 @@ def get_resource(name: str, resource_type: ResourceType) -> Resource:
 
 
 @pytest.mark.skipif(api_key is None, reason="TEST_API_KEY is not set")
+@pytest.mark.integration
 def test_cloud_chroma_vector_store() -> None:
     """Test cloud ChromaDB vector store with embedding model integration."""
     embedding_model = MockEmbeddingModel(name="mock_embeddings")
@@ -128,9 +135,9 @@ def get_resource(name: str, resource_type: ResourceType) -> Resource:
         name="chroma_vector_store",
         embedding_model="mock_embeddings",
         collection="test_collection",
-        api_key=api_key,
-        tenant=tenant,
-        database=database,
+        api_key=api_key or "fake-key",
+        tenant=tenant or "fake-tenant",
+        database=database or "fake-database",
         get_resource=get_resource
     )
 

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -192,5 +192,10 @@ ban-relative-imports = "all"
 [tool.ruff.lint.flake8-type-checking]
 strict = true
 
+[tool.pytest.ini_options]
+markers = [
+    "integration: marks tests that require external services or integration (deselect with '-m \"not integration\"')",
+]
+
 [tool.ruff.format]
 docstring-code-format = true
diff --git a/tools/ut.sh b/tools/ut.sh
@@ -122,6 +122,21 @@ python_tests() {
     set +e
     pushd "${ROOT}"/python
 
+    # Build pytest arguments array based on environment variables
+    local pytest_args=("pytest" "flink_agents")
+
+    # Support marker-based filtering for cleaner argument handling
+    if [ -n "${PYTEST_SKIP_MARKERS:-}" ]; then
+        pytest_args+=("-m" "not ${PYTEST_SKIP_MARKERS}")
+    elif [ -n "${PYTEST_ONLY_MARKERS:-}" ]; then
+        pytest_args+=("-m" "${PYTEST_ONLY_MARKERS}")
+    fi
+
+    # Add verbose flag if requested
+    if [ "${PYTEST_VERBOSE:-false}" = "true" ]; then
+        pytest_args+=("-v")
+    fi
+
     # Install dependencies and run tests
     echo "Installing Python test dependencies..."
     if command -v uv >/dev/null 2>&1; then
@@ -131,8 +146,9 @@ python_tests() {
         uv sync --extra test
         if $verbose; then
             echo "Running tests with uv..."
+            echo "Command: uv run ${pytest_args[*]}"
         fi
-        uv run pytest flink_agents
+        uv run "${pytest_args[@]}"
         testcode=$?
     else
         if $verbose; then
@@ -152,8 +168,9 @@ python_tests() {
         fi
         if $verbose; then
             echo "Running tests with pytest..."
+            echo "Command: ${pytest_args[*]}"
         fi
-        pytest flink_agents
+        "${pytest_args[@]}"
         testcode=$?
     fi