diff --git a/Dockerfile.extproc b/Dockerfile.extproc
index 1ba8b45e..72ead6e4 100644
--- a/Dockerfile.extproc
+++ b/Dockerfile.extproc
@@ -55,4 +55,8 @@ ENV LD_LIBRARY_PATH=/app/lib
 
 EXPOSE 50051
 
-CMD ["/app/extproc-server", "--config", "/app/config/config.yaml"]
+# Copy entrypoint to allow switching config via env var CONFIG_FILE
+COPY scripts/entrypoint.sh /app/entrypoint.sh
+RUN chmod +x /app/entrypoint.sh
+
+ENTRYPOINT ["/app/entrypoint.sh"]
diff --git a/config/config.testing.yaml b/config/config.testing.yaml
new file mode 100644
index 00000000..0b84e0ff
--- /dev/null
+++ b/config/config.testing.yaml
@@ -0,0 +1,84 @@
+bert_model:
+  model_id: sentence-transformers/all-MiniLM-L12-v2
+  threshold: 0.6
+  use_cpu: true
+
+semantic_cache:
+  enabled: true
+  backend_type: "memory"
+  similarity_threshold: 0.8
+  max_entries: 1000
+  ttl_seconds: 3600
+  eviction_policy: "fifo"
+
+tools:
+  enabled: true
+  top_k: 3
+  similarity_threshold: 0.2
+  tools_db_path: "config/tools_db.json"
+  fallback_to_empty: true
+
+prompt_guard:
+  enabled: true
+  use_modernbert: true
+  model_id: "models/jailbreak_classifier_modernbert-base_model"
+  threshold: 0.7
+  use_cpu: true
+  jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"
+
+vllm_endpoints:
+  - name: "mock"
+    address: "mock-vllm"
+    port: 8000
+    models:
+      - "openai/gpt-oss-20b"
+    weight: 1
+    health_check_path: "/health"
+
+model_config:
+  "openai/gpt-oss-20b":
+    reasoning_family: "gpt-oss"
+    preferred_endpoints: ["mock"]
+    pii_policy:
+      allow_by_default: true
+
+categories:
+  - name: other
+    model_scores:
+      - model: openai/gpt-oss-20b
+        score: 0.7
+        use_reasoning: false
+
+default_model: openai/gpt-oss-20b
+
+reasoning_families:
+  deepseek:
+    type: "chat_template_kwargs"
+    parameter: "thinking"
+
+  qwen3:
+    type: "chat_template_kwargs"
+    parameter: "enable_thinking"
+
+  gpt-oss:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+  gpt:
+    type: "reasoning_effort"
+    parameter: "reasoning_effort"
+
+default_reasoning_effort: high
+
+api:
+  batch_classification:
+    max_batch_size: 100
+    concurrency_threshold: 5
+    max_concurrency: 8
+    metrics:
+      enabled: true
+      detailed_goroutine_tracking: true
+      high_resolution_timing: false
+      sample_rate: 1.0
+      duration_buckets:
+        [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
+      size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]
diff --git a/docker-compose.yml b/docker-compose.yml
index 09f7b9ad..afc7e7e1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,6 +13,7 @@ services:
       - ./models:/app/models:ro
     environment:
       - LD_LIBRARY_PATH=/app/lib
+      - CONFIG_FILE=${CONFIG_FILE:-/app/config/config.yaml}
     networks:
       - semantic-network
     healthcheck:
@@ -44,6 +45,24 @@ services:
       retries: 5
       start_period: 10s
 
+  # Mock vLLM service for testing profile
+  mock-vllm:
+    build:
+      context: ./tools/mock-vllm
+      dockerfile: Dockerfile
+    container_name: mock-vllm
+    profiles: ["testing"]
+    ports:
+      - "8000:8000"
+    networks:
+      - semantic-network
+    healthcheck:
+      test: ["CMD", "curl", "-fsS", "http://localhost:8000/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 5s
+
 networks:
   semantic-network:
     driver: bridge
diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh
new file mode 100644
index 00000000..c0b4093a
--- /dev/null
+++ b/scripts/entrypoint.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CONFIG_FILE_PATH=${CONFIG_FILE:-/app/config/config.yaml}
+
+if [[ ! -f "$CONFIG_FILE_PATH" ]]; then
+  echo "[entrypoint] Config file not found at $CONFIG_FILE_PATH" >&2
+  exit 1
+fi
+
+echo "[entrypoint] Starting semantic-router with config: $CONFIG_FILE_PATH"
+exec /app/extproc-server --config "$CONFIG_FILE_PATH"
diff --git a/tools/mock-vllm/Dockerfile b/tools/mock-vllm/Dockerfile
new file mode 100644
index 00000000..ea955b2b
--- /dev/null
+++ b/tools/mock-vllm/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt 
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY app.py
+
+EXPOSE 8000
+
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/tools/mock-vllm/README.md b/tools/mock-vllm/README.md
new file mode 100644
index 00000000..1ac7a9b8
--- /dev/null
+++ b/tools/mock-vllm/README.md
@@ -0,0 +1,9 @@
+# Mock vLLM (OpenAI-compatible) service
+
+A tiny FastAPI server that emulates minimal endpoints used by the router:
+
+- GET /health
+- GET /v1/models
+- POST /v1/chat/completions
+
+Intended for local testing with Docker Compose profile `testing`.
diff --git a/tools/mock-vllm/app.py b/tools/mock-vllm/app.py
new file mode 100644
index 00000000..e4d02d15
--- /dev/null
+++ b/tools/mock-vllm/app.py
@@ -0,0 +1,81 @@
+import math
+import time
+from typing import List, Optional
+
+from fastapi import FastAPI
+from pydantic import BaseModel
+
+app = FastAPI()
+
+
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+
+
+class ChatRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    temperature: Optional[float] = 0.2
+
+
+@app.get("/health")
+async def health():
+    return {"status": "ok"}
+
+
+@app.get("/v1/models")
+async def models():
+    return {"data": [{"id": "openai/gpt-oss-20b", "object": "model"}]}
+
+
+@app.post("/v1/chat/completions")
+async def chat_completions(req: ChatRequest):
+    # Very simple echo-like behavior
+    last_user = next(
+        (m.content for m in reversed(req.messages) if m.role == "user"), ""
+    )
+    content = f"[mock-{req.model}] You said: {last_user}"
+
+    # Rough token estimation: ~1 token per 4 characters (ceil)
+    def estimate_tokens(text: str) -> int:
+        if not text:
+            return 0
+        return max(1, math.ceil(len(text) / 4))
+
+    prompt_text = "\n".join(
+        m.content for m in req.messages if isinstance(m.content, str)
+    )
+    prompt_tokens = estimate_tokens(prompt_text)
+    completion_tokens = estimate_tokens(content)
+    total_tokens = prompt_tokens + completion_tokens
+
+    created_ts = int(time.time())
+
+    usage = {
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+        "total_tokens": total_tokens,
+        # Optional details fields some clients read when using caching/reasoning
+        "prompt_tokens_details": {"cached_tokens": 0},
+        "completion_tokens_details": {"reasoning_tokens": 0},
+    }
+
+    return {
+        "id": "cmpl-mock-123",
+        "object": "chat.completion",
+        "created": created_ts,
+        "model": req.model,
+        "system_fingerprint": "mock-vllm",
+        "choices": [
+            {
+                "index": 0,
+                "message": {"role": "assistant", "content": content},
+                "finish_reason": "stop",
+                "logprobs": None,
+            }
+        ],
+        "usage": usage,
+        # Some SDKs look for token_usage; keep it as an alias for convenience.
+        "token_usage": usage,
+    }
diff --git a/tools/mock-vllm/requirements.txt b/tools/mock-vllm/requirements.txt
new file mode 100644
index 00000000..3971515d
--- /dev/null
+++ b/tools/mock-vllm/requirements.txt
@@ -0,0 +1,3 @@
+fastapi==0.115.0
+uvicorn==0.30.6
+pydantic==2.9.2
diff --git a/website/docs/getting-started/docker-quickstart.md b/website/docs/getting-started/docker-quickstart.md
index e06bed44..6a517ff2 100644
--- a/website/docs/getting-started/docker-quickstart.md
+++ b/website/docs/getting-started/docker-quickstart.md
@@ -4,42 +4,36 @@ Run Semantic Router + Envoy locally using Docker Compose v2.
 
 ## Prerequisites
 
-- Docker Engine and Docker Compose v2 (use the `docker compose` command, not the legacy `docker-compose`)
+- Docker Engine, see more in [Docker Engine Installation](https://docs.docker.com/engine/install/) 
+- Docker Compose v2 (use the `docker compose` command, not the legacy `docker-compose`)
 
-   ```bash
-   # Verify
-   docker compose version
-   ```
+  Docker Compose Plugin Installation(if missing), see more in [Docker Compose Plugin Installation](https://docs.docker.com/compose/install/linux/#install-using-the-repository)
 
-   Install Docker Compose v2 for Ubuntu(if missing), see more in [Docker Compose Plugin Installation](https://docs.docker.com/compose/install/linux/#install-using-the-repository)
+  ```bash
+  # For Ubuntu and Debian, run:
+  sudo apt-get update
+  sudo apt-get install -y docker-compose-plugin
 
-   ```bash
-   # Remove legacy v1 if present (optional)
-   sudo apt-get remove -y docker-compose || true
+  # For RPM-based distributions, run:
+  sudo yum update
+  sudo yum install docker-compose-plugin
 
-   sudo apt-get update
-   sudo apt-get install -y ca-certificates curl gnupg
-   sudo install -m 0755 -d /etc/apt/keyrings
-   curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --yes --dearmor -o /etc/apt/keyrings/docker.gpg
-   echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(. /etc/os-release && echo $VERSION_CODENAME) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
-   sudo apt-get update
-   sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
-
-   docker compose version
-   ```
+  # Verify
+  docker compose version
+  ```
 
 - Ensure ports 8801, 50051, 19000 are free
 
 ## Install and Run with Docker Compose v2
 
-1) Clone the repo and move into it (from your workspace root):
+**1. Clone the repo and move into it (from your workspace root)**
 
 ```bash
 git clone https://github.com/vllm-project/semantic-router.git
 cd semantic-router
 ```
 
-2) Download required models (classification models):
+**2. Download required models (classification models)**
 
 ```bash
 make download-models
@@ -53,7 +47,7 @@ This downloads the classification models used by the router:
 
 Note: The BERT similarity model defaults to a remote Hugging Face model. See Troubleshooting for offline/local usage.
 
-3) Start the services with Docker Compose v2:
+**3. Start the services with Docker Compose v2**
 
 ```bash
 # Start core services (semantic-router + envoy)
@@ -62,11 +56,12 @@ docker compose up --build
 # Or run in background (recommended)
 docker compose up --build -d
 
-# With testing profile (includes mock vLLM)
-docker compose --profile testing up --build
+# With testing profile (includes mock vLLM). Use testing config to point router at the mock endpoint:
+# (CONFIG_FILE is read by the router entrypoint; the file is mounted from ./config)
+CONFIG_FILE=/app/config/config.testing.yaml docker compose --profile testing up --build
 ```
 
-4) Verify
+**4. Verify**
 
 - Semantic Router (gRPC): localhost:50051
 - Envoy Proxy: http://localhost:8801
@@ -90,7 +85,7 @@ docker compose down
 
 ## Troubleshooting
 
-### 1) Router exits immediately with a Hugging Face DNS/download error
+**1. Router exits immediately with a Hugging Face DNS/download error**
 
 Symptoms (from `docker compose logs -f semantic-router`):
 
@@ -103,32 +98,62 @@ Why: `bert_model.model_id` in `config/config.yaml` points to a remote model (`se
 Fix options:
 
 - Allow network access in the container (online):
+
   - Ensure your host can resolve DNS, or add DNS servers to the `semantic-router` service in `docker-compose.yml`:
 
-      ```yaml
-      services:
-         semantic-router:
-            # ...
-            dns:
-               - 1.1.1.1
-               - 8.8.8.8
-      ```
-      
+    ```yaml
+    services:
+      semantic-router:
+        # ...
+        dns:
+          - 1.1.1.1
+          - 8.8.8.8
+    ```
+
   - If behind a proxy, set `http_proxy/https_proxy/no_proxy` env vars for the service.
 
 - Use a local copy of the model (offline):
-   1. Download `sentence-transformers/all-MiniLM-L12-v2` to `./models/sentence-transformers/all-MiniLM-L12-v2/` on the host.
-   2. Update `config/config.yaml` to use the local path (mounted into the container at `/app/models`):
 
-       ```yaml
-       bert_model:
-          model_id: "models/sentence-transformers/all-MiniLM-L12-v2"
-          threshold: 0.6
-          use_cpu: true
-       ```
+  1. Download `sentence-transformers/all-MiniLM-L12-v2` to `./models/sentence-transformers/all-MiniLM-L12-v2/` on the host.
+  2. Update `config/config.yaml` to use the local path (mounted into the container at `/app/models`):
+
+      ```yaml
+      bert_model:
+        model_id: "models/sentence-transformers/all-MiniLM-L12-v2"
+        threshold: 0.6
+        use_cpu: true
+      ```
+
+  3. Recreate services: `docker compose up -d --build`
 
-   3. Recreate services: `docker compose up -d --build`
+Extra tip: If you use the testing profile, also pass the testing config so the router targets the mock service:
+
+```bash
+CONFIG_FILE=/app/config/config.testing.yaml docker compose --profile testing up --build
+```
+
+**2. Envoy/Router up but requests fail**
+
+- Ensure `mock-vllm` is healthy (testing profile only):
+  - `docker compose ps` should show mock-vllm healthy; logs show 200 on `/health`.
+- Verify the router config in use:
+  - Router logs print `Starting vLLM Semantic Router ExtProc with config: ...`. If it shows `/app/config/config.yaml` while testing, you forgot `CONFIG_FILE`.
+- Basic smoke test via Envoy (OpenAI-compatible):
+  - Send a POST to `http://localhost:8801/v1/chat/completions` with `{"model":"auto", "messages":[{"role":"user","content":"hi"}]}` and check that the mock responds with `[mock-openai/gpt-oss-20b]` content when testing profile is active.
+
+**3. DNS problems inside containers**
+
+If DNS is flaky in your Docker environment, add DNS servers to the `semantic-router` service in `docker-compose.yml`:
+
+```yaml
+services:
+  semantic-router:
+    # ...
+    dns:
+      - 1.1.1.1
+      - 8.8.8.8
+```
 
-### 2) Port already in use
+For corporate proxies, set `http_proxy`, `https_proxy`, and `no_proxy` in the service `environment`.
 
 Make sure 8801, 50051, 19000 are not bound by other processes. Adjust ports in `docker-compose.yml` if needed.