From 3cc06b7d12dd19c79b607ac5ab45a2a072303812 Mon Sep 17 00:00:00 2001
From: charant30 <thotac3030@gmail.com>
Date: Fri, 6 Feb 2026 15:43:39 -0500
Subject: [PATCH] feat: OpenAPI Test Generator - Full-stack application

A complete solution for generating and running API tests from OpenAPI specifications.

Features:
- Upload OpenAPI specs (YAML/JSON) via Web UI or REST API
- Generate pytest tests using Mock LLM (deterministic) or Real LLM (OpenAI/Anthropic)
- Run tests against any API endpoint
- View detailed results with pass/fail, failures, and tracebacks
- Export JUnit XML for CI integration

Tech Stack:
- FastAPI backend with REST API
- Streamlit web interface
- SQLite database for persistence
- pytest for test execution
- httpx for HTTP client

Test Coverage:
- 67 tests (unit, integration, e2e)
- All tests pass on Windows and Mac
---
 .github/workflows/ci.yml                      |  52 ++
 .gitignore                                    |  65 ++
 Dockerfile                                    |  24 +
 Dockerfile.example                            |  14 +
 Dockerfile.streamlit                          |  25 +
 Makefile                                      |  63 ++
 README.md                                     | 191 +++--
 SPECS/api-endpoints.md                        |  27 +
 SPECS/example-api.md                          |  23 +
 SPECS/openapi-parser.md                       |  25 +
 SPECS/streamlit-ui.md                         |  25 +
 SPECS/test-generator.md                       |  23 +
 SPECS/test-runner.md                          |  22 +
 app/__init__.py                               |   1 +
 app/config.py                                 |  63 ++
 app/generator/__init__.py                     |  32 +
 app/generator/llm_client.py                   | 638 +++++++++++++++++
 app/generator/test_generator.py               | 245 +++++++
 app/main.py                                   |  33 +
 app/openapi_parser.py                         | 365 ++++++++++
 app/routes.py                                 | 451 ++++++++++++
 app/runner/__init__.py                        |   8 +
 app/runner/junit_parser.py                    | 199 ++++++
 app/runner/pytest_runner.py                   | 206 ++++++
 app/storage/__init__.py                       |   4 +
 app/storage/db.py                             | 429 +++++++++++
 docker-compose.yml                            |  48 ++
 docs/README.md                                | 106 +++
 docs/SETUP.md                                 | 192 +++++
 docs/WALKTHROUGH.md                           | 667 ++++++++++++++++++
 docs/codegen-log.md                           | 116 +++
 example_api/__init__.py                       |   1 +
 example_api/main.py                           | 100 +++
 openapi_specs/example_openapi.yaml            | 206 ++++++
 requirements.txt                              |  21 +
 run.py                                        | 271 +++++++
 streamlit_app/app.py                          |  58 ++
 .../pages/1_\360\237\223\204_Upload_Spec.py"  | 152 ++++
 ...342\232\231\357\270\217_Generate_Tests.py" | 232 ++++++
 .../3_\342\226\266\357\270\217_Run_Tests.py"  | 207 ++++++
 tests/__init__.py                             |   1 +
 tests/conftest.py                             |  17 +
 tests/e2e/__init__.py                         |   1 +
 tests/e2e/test_full_workflow.py               | 301 ++++++++
 tests/integration/__init__.py                 |   1 +
 tests/integration/test_api_endpoints.py       | 260 +++++++
 tests/unit/__init__.py                        |   1 +
 tests/unit/test_generator.py                  | 285 ++++++++
 tests/unit/test_junit_parser.py               | 244 +++++++
 tests/unit/test_openapi_parser.py             | 345 +++++++++
 50 files changed, 7043 insertions(+), 43 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 Dockerfile
 create mode 100644 Dockerfile.example
 create mode 100644 Dockerfile.streamlit
 create mode 100644 Makefile
 create mode 100644 SPECS/api-endpoints.md
 create mode 100644 SPECS/example-api.md
 create mode 100644 SPECS/openapi-parser.md
 create mode 100644 SPECS/streamlit-ui.md
 create mode 100644 SPECS/test-generator.md
 create mode 100644 SPECS/test-runner.md
 create mode 100644 app/__init__.py
 create mode 100644 app/config.py
 create mode 100644 app/generator/__init__.py
 create mode 100644 app/generator/llm_client.py
 create mode 100644 app/generator/test_generator.py
 create mode 100644 app/main.py
 create mode 100644 app/openapi_parser.py
 create mode 100644 app/routes.py
 create mode 100644 app/runner/__init__.py
 create mode 100644 app/runner/junit_parser.py
 create mode 100644 app/runner/pytest_runner.py
 create mode 100644 app/storage/__init__.py
 create mode 100644 app/storage/db.py
 create mode 100644 docker-compose.yml
 create mode 100644 docs/README.md
 create mode 100644 docs/SETUP.md
 create mode 100644 docs/WALKTHROUGH.md
 create mode 100644 docs/codegen-log.md
 create mode 100644 example_api/__init__.py
 create mode 100644 example_api/main.py
 create mode 100644 openapi_specs/example_openapi.yaml
 create mode 100644 requirements.txt
 create mode 100644 run.py
 create mode 100644 streamlit_app/app.py
 create mode 100644 "streamlit_app/pages/1_\360\237\223\204_Upload_Spec.py"
 create mode 100644 "streamlit_app/pages/2_\342\232\231\357\270\217_Generate_Tests.py"
 create mode 100644 "streamlit_app/pages/3_\342\226\266\357\270\217_Run_Tests.py"
 create mode 100644 tests/__init__.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/e2e/__init__.py
 create mode 100644 tests/e2e/test_full_workflow.py
 create mode 100644 tests/integration/__init__.py
 create mode 100644 tests/integration/test_api_endpoints.py
 create mode 100644 tests/unit/__init__.py
 create mode 100644 tests/unit/test_generator.py
 create mode 100644 tests/unit/test_junit_parser.py
 create mode 100644 tests/unit/test_openapi_parser.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..621bc7c1
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,52 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run linting
+        run: |
+          pip install ruff
+          ruff check .
+        continue-on-error: true
+
+      - name: Run unit tests
+        run: |
+          mkdir -p test-results
+          pytest tests/unit -v --junitxml=test-results/junit-unit.xml
+
+      - name: Run integration tests
+        run: |
+          pytest tests/integration -v --junitxml=test-results/junit-integration.xml
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: test-results
+          path: test-results/
+
+      - name: Publish Test Results
+        uses: EnricoMi/publish-unit-test-result-action@v2
+        if: always()
+        with:
+          files: test-results/*.xml
diff --git a/.gitignore b/.gitignore
index e69de29b..b1f3241f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,65 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+ENV/
+env/
+.venv/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.nox/
+junit.xml
+
+# Generated files
+generated_tests/
+data/
+test-results/
+*.db
+
+# Logs
+*.log
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Environment
+.env
+.env.local
+
+# Ruff cache
+.ruff_cache/
+
+# Streamlit
+.streamlit/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..2cf2cd54
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,24 @@
+# Dockerfile for FastAPI backend
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY app/ app/
+COPY openapi_specs/ openapi_specs/
+
+# Create data directories
+RUN mkdir -p /app/data /app/generated_tests
+
+# Environment variables
+ENV DATABASE_PATH=/app/data/app.db
+ENV GENERATED_TESTS_DIR=/app/generated_tests
+ENV DEFAULT_TARGET_URL=http://example-api:8001
+
+EXPOSE 8000
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/Dockerfile.example b/Dockerfile.example
new file mode 100644
index 00000000..e6fc5a4f
--- /dev/null
+++ b/Dockerfile.example
@@ -0,0 +1,14 @@
+# Dockerfile for example target API
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install dependencies
+RUN pip install --no-cache-dir fastapi uvicorn pydantic
+
+# Copy example API code
+COPY example_api/ example_api/
+
+EXPOSE 8001
+
+CMD ["uvicorn", "example_api.main:app", "--host", "0.0.0.0", "--port", "8001"]
diff --git a/Dockerfile.streamlit b/Dockerfile.streamlit
new file mode 100644
index 00000000..b57fdc04
--- /dev/null
+++ b/Dockerfile.streamlit
@@ -0,0 +1,25 @@
+# Dockerfile for Streamlit UI
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY app/ app/
+COPY streamlit_app/ streamlit_app/
+COPY openapi_specs/ openapi_specs/
+
+# Create data directories
+RUN mkdir -p /app/data /app/generated_tests
+
+# Environment variables
+ENV DATABASE_PATH=/app/data/app.db
+ENV GENERATED_TESTS_DIR=/app/generated_tests
+ENV DEFAULT_TARGET_URL=http://example-api:8001
+
+EXPOSE 8501
+
+CMD ["streamlit", "run", "streamlit_app/app.py", "--server.port", "8501", "--server.address", "0.0.0.0"]
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..afcfec81
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,63 @@
+.PHONY: dev ui example test e2e lint install clean
+
+# Install dependencies
+install:
+	pip install -r requirements.txt
+
+# Run the FastAPI backend
+dev:
+	uvicorn app.main:app --reload --port 8000
+
+# Run the Streamlit UI
+ui:
+	streamlit run streamlit_app/app.py --server.port 8501
+
+# Run the example target API
+example:
+	uvicorn example_api.main:app --reload --port 8001
+
+# Run all tests
+test:
+	pytest tests/ -v --junitxml=test-results/junit.xml
+
+# Run only unit tests
+test-unit:
+	pytest tests/unit/ -v
+
+# Run only integration tests
+test-integration:
+	pytest tests/integration/ -v
+
+# Run E2E tests (requires example API to be running or will start it)
+e2e:
+	pytest tests/e2e/ -v
+
+# Run linting
+lint:
+	ruff check .
+	ruff format --check .
+
+# Format code
+format:
+	ruff format .
+
+# Clean generated files
+clean:
+	rm -rf generated_tests/
+	rm -rf data/
+	rm -rf test-results/
+	rm -rf .pytest_cache/
+	rm -rf __pycache__/
+	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
+
+# Build Docker images
+docker-build:
+	docker-compose build
+
+# Run with Docker
+docker-up:
+	docker-compose up
+
+# Stop Docker containers
+docker-down:
+	docker-compose down
diff --git a/README.md b/README.md
index 494f1c75..9f93a2ef 100644
--- a/README.md
+++ b/README.md
@@ -1,43 +1,148 @@
-# Candidate Assessment: Spec-Driven Development With Codegen Tools
-
-This assessment evaluates how you use modern code generation tools (for example `5.2-Codex`, `Claude`, `Copilot`, and similar) to design, build, and test a software application using a spec-driven development pattern. You may build a frontend, a backend, or both.
-
-## Goals
-- Build a working application with at least one meaningful feature.
-- Create a testing framework to validate the application.
-- Demonstrate effective use of code generation tools to accelerate delivery.
-- Show clear, maintainable engineering practices.
-
-## Deliverables
-- Application source code in this repository.
-- A test suite and test harness that can be run locally.
-- Documentation that explains how to run the app and the tests.
-
-## Scope Options
-Pick one:
-- Frontend-only application.
-- Backend-only application.
-- Full-stack application.
-
-Your solution should include at least one real workflow, for example:
-- Create and view a resource.
-- Search or filter data.
-- Persist data in memory or storage.
-
-## Rules
-- You must use a code generation tool (for example `5.2-Codex`, `Claude`, or similar). You can use multiple tools.
-- You must build the application and a testing framework for it.
-- The application and tests must run locally.
-- Do not include secrets or credentials in this repository.
-
-## Evaluation Criteria
-- Working product: Does the app do what it claims?
-- Test coverage: Do tests cover key workflows and edge cases?
-- Engineering quality: Clarity, structure, and maintainability.
-- Use of codegen: How effectively you used tools to accelerate work.
-- Documentation: Clear setup and run instructions.
-
-## What to Submit
-- When you are complete, put up a Pull Request against this repository with your changes.
-- A short summary of your approach and tools used in your PR submission
-- Any additional information or approach that helped you.
+# OpenAPI Test Generator
+
+Generate and run API tests automatically from OpenAPI specifications.
+
+![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)
+![Platform](https://img.shields.io/badge/platform-Windows%20%7C%20Mac%20%7C%20Linux-lightgrey.svg)
+
+---
+
+## What This Does
+
+1. **Upload** an OpenAPI spec (YAML or JSON)
+2. **Generate** pytest test files automatically
+3. **Run** tests against any API
+4. **View** results with pass/fail details
+
+---
+
+## Quick Start
+
+### 1. Install Dependencies
+```bash
+python run.py setup
+```
+
+### 2. Start the Application
+```bash
+python run.py all
+```
+
+### 3. Open Your Browser
+Go to: **http://localhost:8501**
+
+---
+
+## Usage
+
+| Command | What it does |
+|---------|--------------|
+| `python run.py setup` | Install dependencies |
+| `python run.py all` | Start everything |
+| `python run.py test` | Run tests |
+| `python run.py help` | Show all commands |
+
+---
+
+## Features
+
+- **Web UI**: Easy-to-use Streamlit interface
+- **Multiple LLM Providers**: Mock (default), OpenAI, or Anthropic
+- **Cross-platform**: Works on Windows, Mac, and Linux
+- **Test Runner**: Execute tests and view detailed results
+
+---
+
+## LLM Providers
+
+Choose your test generation engine:
+
+| Provider | Models | API Key |
+|----------|--------|---------|
+| **Mock** (default) | Deterministic | Not needed |
+| **OpenAI** | GPT-4o, GPT-4, GPT-3.5 | Required |
+| **Anthropic** | Claude 3 Haiku/Sonnet/Opus | Required |
+
+### Using OpenAI or Anthropic
+
+Set your API key as an environment variable:
+```bash
+export OPENAI_API_KEY="your-key"      # For OpenAI
+export ANTHROPIC_API_KEY="your-key"   # For Anthropic
+```
+
+Or enter it directly in the UI when generating tests.
+
+---
+
+## Services
+
+| Service | URL |
+|---------|-----|
+| Web UI | http://localhost:8501 |
+| Backend API | http://localhost:8000 |
+| Example API | http://localhost:8001 |
+
+---
+
+## Requirements
+
+- Python 3.10 or higher
+- pip (Python package manager)
+
+---
+
+## Documentation
+
+- [Complete Walkthrough](docs/WALKTHROUGH.md) - Step-by-step guide for UI and CLI usage
+- [Full Setup Guide](docs/SETUP.md) - Detailed instructions and troubleshooting
+- [API Docs](http://localhost:8000/docs) - Interactive API documentation (when running)
+
+---
+
+## Project Structure
+
+```
+spec-driven-development/
+├── run.py              # Runner script (start here!)
+├── app/                # FastAPI backend
+├── streamlit_app/      # Web UI (Streamlit)
+├── example_api/        # Sample API for testing
+├── tests/              # Test suite (67 tests)
+├── openapi_specs/      # Example OpenAPI specs
+├── SPECS/              # Feature specifications
+└── docs/               # Documentation
+```
+
+---
+
+## Running Tests
+
+```bash
+python run.py test
+```
+
+All 67 tests should pass on both Windows and Mac.
+
+---
+
+## Built With
+
+- **FastAPI** - Backend API framework
+- **Streamlit** - Web UI framework
+- **pytest** - Testing framework
+- **httpx** - HTTP client
+- **SQLite** - Local database
+
+---
+
+## Development
+
+See [docs/SETUP.md](docs/SETUP.md) for developer setup instructions.
+
+### Quick Dev Commands
+```bash
+python run.py api       # Backend only
+python run.py ui        # UI only
+python run.py example   # Example API only
+```
diff --git a/SPECS/api-endpoints.md b/SPECS/api-endpoints.md
new file mode 100644
index 00000000..2b72ebad
--- /dev/null
+++ b/SPECS/api-endpoints.md
@@ -0,0 +1,27 @@
+# Feature Spec: REST API Endpoints
+
+## Goal
+Provide FastAPI endpoints to upload specs, generate tests, run tests.
+
+## Scope
+- In: CRUD for specs, sync generation, sync test runs
+- Out: Async background jobs, webhooks
+
+## Requirements
+- POST /specs to upload OpenAPI spec
+- GET /specs to list all specs
+- GET /specs/{id} to get spec details
+- POST /generate to generate tests for a spec
+- GET /generations/{id} to get generation details
+- POST /runs to execute tests
+- GET /runs/{id} to get results
+
+## Acceptance Criteria
+- [ ] POST /specs validates and stores spec, returns spec_id
+- [ ] GET /specs returns list of all specs
+- [ ] GET /specs/{id} returns spec details with endpoints
+- [ ] POST /generate creates pytest files, returns generation_id
+- [ ] GET /generations/{id} returns generation status and files
+- [ ] POST /runs executes tests, returns run_id and summary
+- [ ] GET /runs/{id} returns status, passed, failed, failures list
+- [ ] GET /health returns {"status": "ok"}
diff --git a/SPECS/example-api.md b/SPECS/example-api.md
new file mode 100644
index 00000000..074c4e3c
--- /dev/null
+++ b/SPECS/example-api.md
@@ -0,0 +1,23 @@
+# Feature Spec: Example Target API
+
+## Goal
+Provide a simple FastAPI application to test generated tests against.
+
+## Scope
+- In: Basic CRUD for items resource
+- Out: Authentication, complex relationships
+
+## Requirements
+- GET /health returns status
+- POST /items creates item with name and price
+- GET /items/{id} returns item or 404
+- GET /items returns list of all items
+
+## Acceptance Criteria
+- [ ] Health endpoint returns {"status": "ok"}
+- [ ] POST /items with valid body returns 201 + created item with id
+- [ ] POST /items with missing required fields returns 422
+- [ ] GET /items returns list of all items
+- [ ] GET /items/{id} with valid id returns 200 + item
+- [ ] GET /items/{id} with invalid id returns 404
+- [ ] OpenAPI spec matches implementation
diff --git a/SPECS/openapi-parser.md b/SPECS/openapi-parser.md
new file mode 100644
index 00000000..b13f736c
--- /dev/null
+++ b/SPECS/openapi-parser.md
@@ -0,0 +1,25 @@
+# Feature Spec: OpenAPI Parser
+
+## Goal
+Parse and normalize OpenAPI 3.x specs into internal model.
+
+## Scope
+- In: YAML/JSON parsing, endpoint extraction, schema extraction
+- Out: OpenAPI 2.x (Swagger), code generation from schemas
+
+## Requirements
+- Parse YAML and JSON format specs
+- Extract all endpoints with path, method, parameters
+- Extract request body schemas
+- Extract response codes and schemas
+- Handle $ref references in schemas
+
+## Acceptance Criteria
+- [ ] parse_spec() handles valid YAML
+- [ ] parse_spec() handles valid JSON
+- [ ] parse_spec() raises error on invalid content
+- [ ] normalize_spec() returns NormalizedSpec with all endpoints
+- [ ] Parameters (path, query) extracted correctly
+- [ ] Request body schema extracted when present
+- [ ] Response schemas extracted for each status code
+- [ ] $ref references resolved correctly
diff --git a/SPECS/streamlit-ui.md b/SPECS/streamlit-ui.md
new file mode 100644
index 00000000..cac8c9e2
--- /dev/null
+++ b/SPECS/streamlit-ui.md
@@ -0,0 +1,25 @@
+# Feature Spec: Streamlit UI
+
+## Goal
+Provide web interface for the test generator workflow.
+
+## Scope
+- In: Upload spec, generate tests, run tests, view results
+- Out: User auth, team features, history browsing
+
+## Requirements
+- Page 1: Upload/paste OpenAPI spec, preview endpoints
+- Page 2: Select spec, generate tests, view generated code
+- Page 3: Run tests, display pass/fail summary, show failures
+
+## Acceptance Criteria
+- [ ] Home page with navigation to all features
+- [ ] Upload page accepts YAML/JSON text or file upload
+- [ ] Upload page shows parsed endpoints in table
+- [ ] Upload page has "Load Example" button
+- [ ] Generate page lists saved specs in dropdown
+- [ ] Generate page shows generated test files with syntax highlighting
+- [ ] Run page has input for target base URL
+- [ ] Run page shows pass/fail/skipped counts with colors
+- [ ] Run page shows expandable failure details
+- [ ] Download buttons for generated tests and JUnit XML
diff --git a/SPECS/test-generator.md b/SPECS/test-generator.md
new file mode 100644
index 00000000..34087018
--- /dev/null
+++ b/SPECS/test-generator.md
@@ -0,0 +1,23 @@
+# Feature Spec: Test Generator
+
+## Goal
+Generate pytest test files from normalized OpenAPI specs.
+
+## Scope
+- In: Mock LLM (deterministic), Real LLM (optional), pytest output
+- Out: Other test frameworks, complex test scenarios
+
+## Requirements
+- LLMClient interface with generate_tests() method
+- MockLLMClient returns deterministic output (no API calls)
+- RealLLMClient wraps OpenAI/Anthropic (optional, via env var)
+- Generate tests: happy path, negative cases, schema validation
+
+## Acceptance Criteria
+- [ ] MockLLMClient generates same output for same input
+- [ ] Generated Python code is syntactically valid (ast.parse)
+- [ ] Happy path test for each endpoint
+- [ ] 404 test for GET endpoints with path params
+- [ ] 400/422 test for POST endpoints with required fields
+- [ ] Tests use httpx client
+- [ ] conftest.py generated with fixtures
diff --git a/SPECS/test-runner.md b/SPECS/test-runner.md
new file mode 100644
index 00000000..a6af75da
--- /dev/null
+++ b/SPECS/test-runner.md
@@ -0,0 +1,22 @@
+# Feature Spec: Test Runner
+
+## Goal
+Execute generated pytest tests and parse results.
+
+## Scope
+- In: pytest execution, JUnit XML parsing, result summary
+- Out: Parallel execution, distributed testing
+
+## Requirements
+- Run pytest programmatically with JUnit XML output
+- Capture stdout/stderr
+- Parse JUnit XML for pass/fail counts
+- Extract failure details (test name, message, traceback)
+
+## Acceptance Criteria
+- [ ] run_tests() executes pytest on given directory
+- [ ] Returns RunResult with exit_code, passed, failed, skipped
+- [ ] JUnit XML file is generated
+- [ ] parse_junit_xml() extracts test counts
+- [ ] Failure details include test name and traceback
+- [ ] Duration captured in seconds
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 00000000..5f2c338f
--- /dev/null
+++ b/app/__init__.py
@@ -0,0 +1 @@
+# OpenAPI Test Generator App
diff --git a/app/config.py b/app/config.py
new file mode 100644
index 00000000..0b13c32e
--- /dev/null
+++ b/app/config.py
@@ -0,0 +1,63 @@
+"""
+Configuration - Environment-based configuration for the application.
+"""
+
+import os
+from pathlib import Path
+
+# Get the project root directory (where spec-driven-development folder is)
+_PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+
+
+def _resolve_path(env_var: str, default_relative: str) -> Path:
+    """Resolve a path from environment or default, ensuring it's absolute."""
+    env_value = os.environ.get(env_var)
+    if env_value:
+        path = Path(env_value)
+        return path.resolve() if not path.is_absolute() else path
+    return (_PROJECT_ROOT / default_relative).resolve()
+
+
+class Config:
+    """Application configuration"""
+
+    # Database - always use absolute path
+    DATABASE_PATH = _resolve_path("DATABASE_PATH", "data/app.db")
+
+    # Generated tests output - always use absolute path
+    GENERATED_TESTS_DIR = _resolve_path("GENERATED_TESTS_DIR", "generated_tests")
+
+    # Default target URL for running tests
+    DEFAULT_TARGET_URL = os.environ.get("DEFAULT_TARGET_URL", "http://localhost:8001")
+
+    # LLM configuration
+    # Supported providers: "mock", "openai", "anthropic"
+    LLM_PROVIDER = os.environ.get("LLM_PROVIDER", "mock")
+
+    # API Keys (set the one for your chosen provider)
+    OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+    ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")
+
+    # Legacy support: LLM_API_KEY works as fallback
+    LLM_API_KEY = os.environ.get("LLM_API_KEY")
+
+    # Model configuration
+    OPENAI_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
+    ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_MODEL", "claude-3-haiku-20240307")
+
+    # Check if real LLM is configured
+    @property
+    def USE_REAL_LLM(self) -> bool:
+        if self.LLM_PROVIDER == "mock":
+            return False
+        if self.LLM_PROVIDER == "openai":
+            return bool(self.OPENAI_API_KEY or self.LLM_API_KEY)
+        if self.LLM_PROVIDER == "anthropic":
+            return bool(self.ANTHROPIC_API_KEY or self.LLM_API_KEY)
+        return False
+
+    # Test execution
+    TEST_TIMEOUT = int(os.environ.get("TEST_TIMEOUT", "300"))
+
+
+config = Config()
diff --git a/app/generator/__init__.py b/app/generator/__init__.py
new file mode 100644
index 00000000..abd3df9a
--- /dev/null
+++ b/app/generator/__init__.py
@@ -0,0 +1,32 @@
+# Generator package
+from .llm_client import (
+    LLMClient,
+    MockLLMClient,
+    OpenAIClient,
+    AnthropicClient,
+    RealLLMClient,
+    get_llm_client,
+    GeneratedTestCase,
+    GeneratedTestPlan,
+    TestCase,  # Backwards-compatible alias
+    TestPlan,  # Backwards-compatible alias
+)
+from .test_generator import PytestGenerator
+
+# Keep TestGenerator as alias for backwards compatibility
+TestGenerator = PytestGenerator
+
+__all__ = [
+    "LLMClient",
+    "MockLLMClient",
+    "OpenAIClient",
+    "AnthropicClient",
+    "RealLLMClient",
+    "get_llm_client",
+    "GeneratedTestCase",
+    "GeneratedTestPlan",
+    "TestCase",
+    "TestPlan",
+    "PytestGenerator",
+    "TestGenerator",
+]
diff --git a/app/generator/llm_client.py b/app/generator/llm_client.py
new file mode 100644
index 00000000..7aeeaaaf
--- /dev/null
+++ b/app/generator/llm_client.py
@@ -0,0 +1,638 @@
+"""
+LLM Client - Abstract interface and implementations for test generation.
+
+Provides:
+- MockLLMClient: Deterministic local/CI usage (no API calls)
+- OpenAIClient: OpenAI GPT integration
+- AnthropicClient: Anthropic Claude integration
+"""
+
+import json
+import os
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Optional
+
+from app.openapi_parser import Endpoint, NormalizedSpec
+
+
+@dataclass
+class GeneratedTestCase:
+    """Represents a single test case to generate"""
+
+    name: str
+    description: str
+    method: str
+    path: str
+    expected_status: int
+    request_body: Optional[dict] = None
+    path_params: Optional[dict] = None
+    query_params: Optional[dict] = None
+    assertions: list[str] = None
+
+    def __post_init__(self):
+        if self.assertions is None:
+            self.assertions = []
+
+
+# Backwards-compatible alias (avoid name starting with 'Test' for pytest)
+TestCase = GeneratedTestCase
+
+
+@dataclass
+class GeneratedTestPlan:
+    """Collection of test cases for an endpoint or spec"""
+
+    endpoint_path: str
+    endpoint_method: str
+    test_cases: list[GeneratedTestCase]
+
+
+# Backwards-compatible alias (avoid name starting with 'Test' for pytest)
+TestPlan = GeneratedTestPlan
+
+
+class LLMClient(ABC):
+    """Abstract base class for LLM clients"""
+
+    @abstractmethod
+    def generate_test_plan(self, endpoint: Endpoint, spec: NormalizedSpec) -> GeneratedTestPlan:
+        """Generate a test plan for a single endpoint."""
+        pass
+
+    @abstractmethod
+    def generate_tests_for_spec(self, spec: NormalizedSpec) -> list[GeneratedTestPlan]:
+        """Generate test plans for all endpoints in a spec."""
+        pass
+
+
+# =============================================================================
+# Mock LLM Client (Default - No API calls)
+# =============================================================================
+
+
+class MockLLMClient(LLMClient):
+    """
+    Deterministic mock LLM client for local development and CI.
+
+    Generates test cases based purely on endpoint structure,
+    without any randomness or external API calls.
+    """
+
+    def generate_test_plan(self, endpoint: Endpoint, spec: NormalizedSpec) -> GeneratedTestPlan:
+        """Generate deterministic test plan for an endpoint"""
+        test_cases = []
+
+        # Generate happy path test
+        happy_path = self._generate_happy_path(endpoint, spec)
+        if happy_path:
+            test_cases.append(happy_path)
+
+        # Generate 404 test for endpoints with path parameters
+        not_found = self._generate_not_found_test(endpoint)
+        if not_found:
+            test_cases.append(not_found)
+
+        # Generate validation error test for POST/PUT with request body
+        validation_error = self._generate_validation_error_test(endpoint)
+        if validation_error:
+            test_cases.append(validation_error)
+
+        return GeneratedTestPlan(
+            endpoint_path=endpoint.path,
+            endpoint_method=endpoint.method,
+            test_cases=test_cases,
+        )
+
+    def generate_tests_for_spec(self, spec: NormalizedSpec) -> list[GeneratedTestPlan]:
+        """Generate test plans for all endpoints"""
+        return [self.generate_test_plan(ep, spec) for ep in spec.endpoints]
+
+    def _generate_happy_path(
+        self, endpoint: Endpoint, spec: NormalizedSpec
+    ) -> Optional[GeneratedTestCase]:
+        """Generate a happy path test case"""
+        expected_status = 200
+        for response in endpoint.responses:
+            if response.status_code in [200, 201, 204]:
+                expected_status = response.status_code
+                break
+
+        path_params = {}
+        for param in endpoint.parameters:
+            if param.location == "path":
+                if param.example:
+                    path_params[param.name] = param.example
+                else:
+                    path_params[param.name] = self._generate_example_value(
+                        param.schema_type
+                    )
+
+        request_body = None
+        if endpoint.request_body:
+            request_body = self._generate_example_body(
+                endpoint.request_body.schema, spec.schemas
+            )
+
+        operation_id = (
+            endpoint.operation_id
+            or f"{endpoint.method.lower()}_{endpoint.path.replace('/', '_').strip('_')}"
+        )
+        test_name = f"test_{operation_id}_success"
+
+        assertions = [f"assert response.status_code == {expected_status}"]
+        if expected_status != 204:
+            assertions.append("assert response.json() is not None")
+
+        return GeneratedTestCase(
+            name=test_name,
+            description=f"Test successful {endpoint.method} {endpoint.path}",
+            method=endpoint.method,
+            path=endpoint.path,
+            expected_status=expected_status,
+            request_body=request_body,
+            path_params=path_params if path_params else None,
+            assertions=assertions,
+        )
+
+    def _generate_not_found_test(self, endpoint: Endpoint) -> Optional[GeneratedTestCase]:
+        """Generate 404 test for endpoints with path parameters"""
+        if endpoint.method not in ["GET", "PUT", "DELETE"]:
+            return None
+
+        path_params = [p for p in endpoint.parameters if p.location == "path"]
+        if not path_params:
+            return None
+
+        has_404 = any(r.status_code == 404 for r in endpoint.responses)
+        if not has_404:
+            return None
+
+        operation_id = (
+            endpoint.operation_id
+            or f"{endpoint.method.lower()}_{endpoint.path.replace('/', '_').strip('_')}"
+        )
+        test_name = f"test_{operation_id}_not_found"
+
+        fake_params = {p.name: "nonexistent-id-12345" for p in path_params}
+
+        return GeneratedTestCase(
+            name=test_name,
+            description=f"Test {endpoint.method} {endpoint.path} with non-existent resource",
+            method=endpoint.method,
+            path=endpoint.path,
+            expected_status=404,
+            path_params=fake_params,
+            assertions=["assert response.status_code == 404"],
+        )
+
+    def _generate_validation_error_test(self, endpoint: Endpoint) -> Optional[GeneratedTestCase]:
+        """Generate validation error test for POST/PUT with required body"""
+        if endpoint.method not in ["POST", "PUT", "PATCH"]:
+            return None
+
+        if not endpoint.request_body:
+            return None
+
+        has_422 = any(r.status_code == 422 for r in endpoint.responses)
+        if not has_422:
+            return None
+
+        operation_id = (
+            endpoint.operation_id
+            or f"{endpoint.method.lower()}_{endpoint.path.replace('/', '_').strip('_')}"
+        )
+        test_name = f"test_{operation_id}_validation_error"
+
+        return GeneratedTestCase(
+            name=test_name,
+            description=f"Test {endpoint.method} {endpoint.path} with invalid/missing required fields",
+            method=endpoint.method,
+            path=endpoint.path,
+            expected_status=422,
+            request_body={},
+            assertions=["assert response.status_code == 422"],
+        )
+
+    def _generate_example_value(self, schema_type: str) -> str:
+        """Generate an example value for a schema type"""
+        type_examples = {
+            "string": "test-string",
+            "integer": 1,
+            "number": 1.0,
+            "boolean": True,
+            "uuid": "123e4567-e89b-12d3-a456-426614174000",
+        }
+        return type_examples.get(schema_type, "test-value")
+
+    def _generate_example_body(self, schema: dict, all_schemas: dict) -> dict:
+        """Generate an example request body from schema"""
+        if not schema:
+            return {}
+
+        properties = schema.get("properties", {})
+        required = schema.get("required", [])
+
+        body = {}
+        for prop_name, prop_schema in properties.items():
+            if prop_name in required or not required:
+                body[prop_name] = self._schema_to_value(prop_schema, all_schemas)
+
+        return body
+
+    def _schema_to_value(self, schema: dict, all_schemas: dict):
+        """Convert a schema to an example value"""
+        if not schema:
+            return None
+
+        if "$ref" in schema:
+            ref_name = schema["$ref"].split("/")[-1]
+            if ref_name in all_schemas:
+                return self._generate_example_body(all_schemas[ref_name], all_schemas)
+            return {}
+
+        if "example" in schema:
+            return schema["example"]
+
+        schema_type = schema.get("type", "string")
+
+        if schema_type == "object":
+            return self._generate_example_body(schema, all_schemas)
+        elif schema_type == "array":
+            items = schema.get("items", {})
+            return [self._schema_to_value(items, all_schemas)]
+        elif schema_type == "string":
+            if schema.get("format") == "uuid":
+                return "123e4567-e89b-12d3-a456-426614174000"
+            return "test-string"
+        elif schema_type == "integer":
+            return 1
+        elif schema_type == "number":
+            return 1.0
+        elif schema_type == "boolean":
+            return True
+
+        return None
+
+
+# =============================================================================
+# Base Real LLM Client
+# =============================================================================
+
+
+class BaseLLMClient(LLMClient):
+    """Base class for real LLM clients with shared functionality"""
+
+    def _build_prompt(self, endpoint: Endpoint, spec: NormalizedSpec) -> str:
+        """Build the prompt for test generation"""
+        # Format endpoint info
+        params_str = ""
+        if endpoint.parameters:
+            params_list = [
+                f"  - {p.name} ({p.location}): {p.schema_type}, required={p.required}"
+                for p in endpoint.parameters
+            ]
+            params_str = "\n".join(params_list)
+
+        request_body_str = ""
+        if endpoint.request_body and endpoint.request_body.schema:
+            request_body_str = json.dumps(endpoint.request_body.schema, indent=2)
+
+        responses_str = "\n".join(
+            [f"  - {r.status_code}: {r.description}" for r in endpoint.responses]
+        )
+
+        prompt = f"""Generate pytest test cases for this FastAPI endpoint.
+
+API: {spec.title} v{spec.version}
+Endpoint: {endpoint.method} {endpoint.path}
+Operation ID: {endpoint.operation_id or 'N/A'}
+Summary: {endpoint.summary or 'N/A'}
+
+Parameters:
+{params_str or '  None'}
+
+Request Body Schema:
+{request_body_str or '  None'}
+
+Responses:
+{responses_str}
+
+IMPORTANT RULES:
+1. This is a FastAPI application. Error responses use "detail" field, NOT "error" field.
+   Example: {{"detail": "Item not found"}}
+2. Test names MUST follow this pattern:
+   - Happy path: test_{{operation_id}}_success
+   - Not found: test_{{operation_id}}_not_found
+   - Validation error: test_{{operation_id}}_validation_error
+3. For GET/DELETE endpoints with path parameters (like /items/{{id}}):
+   - The "success" test should have path_params with a placeholder value like "test-id"
+   - Our test runner will automatically create a resource first and use its real ID
+4. Health endpoint (/health) returns {{"status": "ok"}}
+5. POST endpoints return 201 for successful creation, not 200
+6. Use ONLY these assertions - do not check for fields that aren't in the schema:
+   - assert response.status_code == <expected_status>
+   - assert response.json() is not None (for non-204 responses)
+   - assert "id" in response.json() (only for POST create responses)
+
+Generate test cases as JSON array with this structure:
+[
+  {{
+    "name": "test_{{operation_id}}_success",
+    "description": "What this test verifies",
+    "expected_status": 200,
+    "path_params": {{"id": "test-id"}} or null,
+    "query_params": {{"param": "value"}} or null,
+    "request_body": {{"field": "value"}} or null,
+    "assertions": ["assert response.status_code == 200"]
+  }}
+]
+
+Generate tests for:
+1. Happy path (successful request) - name must contain "success"
+2. Not found (404) for GET/PUT/DELETE with path params - name must contain "not_found"
+3. Validation error (422) for POST/PUT with missing required fields - name must contain "validation_error"
+
+Return ONLY the JSON array, no other text."""
+
+        return prompt
+
+    def _parse_llm_response(
+        self, response_text: str, endpoint: Endpoint
+    ) -> list[GeneratedTestCase]:
+        """Parse LLM response into TestCase objects"""
+        try:
+            # Try to extract JSON from response
+            text = response_text.strip()
+
+            # Handle markdown code blocks
+            if "```json" in text:
+                text = text.split("```json")[1].split("```")[0]
+            elif "```" in text:
+                text = text.split("```")[1].split("```")[0]
+
+            test_data = json.loads(text)
+
+            if not isinstance(test_data, list):
+                test_data = [test_data]
+
+            test_cases = []
+            for i, tc in enumerate(test_data):
+                test_cases.append(
+                    GeneratedTestCase(
+                        name=tc.get("name", f"test_{endpoint.operation_id}_{i}"),
+                        description=tc.get("description", ""),
+                        method=endpoint.method,
+                        path=endpoint.path,
+                        expected_status=tc.get("expected_status", 200),
+                        request_body=tc.get("request_body"),
+                        path_params=tc.get("path_params"),
+                        query_params=tc.get("query_params"),
+                        assertions=tc.get(
+                            "assertions",
+                            [f"assert response.status_code == {tc.get('expected_status', 200)}"],
+                        ),
+                    )
+                )
+
+            return test_cases
+
+        except (json.JSONDecodeError, KeyError, TypeError) as e:
+            print(f"Warning: Failed to parse LLM response: {e}")
+            # Fall back to mock generation
+            mock = MockLLMClient()
+            plan = mock.generate_test_plan(endpoint, NormalizedSpec(
+                title="", version="", endpoints=[endpoint], schemas={}
+            ))
+            return plan.test_cases
+
+    def generate_tests_for_spec(self, spec: NormalizedSpec) -> list[GeneratedTestPlan]:
+        """Generate test plans for all endpoints"""
+        return [self.generate_test_plan(ep, spec) for ep in spec.endpoints]
+
+
+# =============================================================================
+# OpenAI Client
+# =============================================================================
+
+
+class OpenAIClient(BaseLLMClient):
+    """
+    OpenAI GPT client for test generation.
+
+    Requires OPENAI_API_KEY environment variable.
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: str = "gpt-4o-mini",
+    ):
+        self.api_key = api_key or os.environ.get("OPENAI_API_KEY") or os.environ.get("LLM_API_KEY")
+        self.model = model
+
+        if not self.api_key:
+            raise ValueError("OPENAI_API_KEY is required for OpenAIClient")
+
+        # Import here to make it optional
+        try:
+            from openai import OpenAI
+            self.client = OpenAI(api_key=self.api_key)
+        except ImportError:
+            raise ImportError(
+                "openai package is required. Install with: pip install openai"
+            )
+
+    def generate_test_plan(self, endpoint: Endpoint, spec: NormalizedSpec) -> GeneratedTestPlan:
+        """Generate test plan using OpenAI GPT"""
+        prompt = self._build_prompt(endpoint, spec)
+
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are an expert API tester. Generate comprehensive pytest test cases for API endpoints. Always respond with valid JSON.",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                temperature=0.3,
+                max_tokens=2000,
+            )
+
+            response_text = response.choices[0].message.content
+            test_cases = self._parse_llm_response(response_text, endpoint)
+
+            return TestPlan(
+                endpoint_path=endpoint.path,
+                endpoint_method=endpoint.method,
+                test_cases=test_cases,
+            )
+
+        except Exception as e:
+            print(f"Warning: OpenAI API call failed: {e}")
+            # Fall back to mock
+            mock = MockLLMClient()
+            return mock.generate_test_plan(endpoint, spec)
+
+
+# =============================================================================
+# Anthropic Client
+# =============================================================================
+
+
+class AnthropicClient(BaseLLMClient):
+    """
+    Anthropic Claude client for test generation.
+
+    Requires ANTHROPIC_API_KEY environment variable.
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: str = "claude-3-haiku-20240307",
+    ):
+        self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("LLM_API_KEY")
+        self.model = model
+
+        if not self.api_key:
+            raise ValueError("ANTHROPIC_API_KEY is required for AnthropicClient")
+
+        # Import here to make it optional
+        try:
+            import anthropic
+            self.client = anthropic.Anthropic(api_key=self.api_key)
+        except ImportError:
+            raise ImportError(
+                "anthropic package is required. Install with: pip install anthropic"
+            )
+
+    def generate_test_plan(self, endpoint: Endpoint, spec: NormalizedSpec) -> GeneratedTestPlan:
+        """Generate test plan using Anthropic Claude"""
+        prompt = self._build_prompt(endpoint, spec)
+
+        try:
+            response = self.client.messages.create(
+                model=self.model,
+                max_tokens=2000,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"You are an expert API tester. Generate comprehensive pytest test cases. Always respond with valid JSON only, no other text.\n\n{prompt}",
+                    }
+                ],
+            )
+
+            response_text = response.content[0].text
+            test_cases = self._parse_llm_response(response_text, endpoint)
+
+            return TestPlan(
+                endpoint_path=endpoint.path,
+                endpoint_method=endpoint.method,
+                test_cases=test_cases,
+            )
+
+        except Exception as e:
+            print(f"Warning: Anthropic API call failed: {e}")
+            # Fall back to mock
+            mock = MockLLMClient()
+            return mock.generate_test_plan(endpoint, spec)
+
+
+# =============================================================================
+# Legacy RealLLMClient (for backwards compatibility)
+# =============================================================================
+
+
+class RealLLMClient(BaseLLMClient):
+    """
+    Legacy real LLM client - routes to OpenAI or Anthropic based on provider.
+
+    Kept for backwards compatibility.
+    """
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        provider: str = "openai",
+        model: Optional[str] = None,
+    ):
+        self.provider = provider.lower()
+        self.api_key = api_key
+
+        if self.provider == "openai":
+            self._client = OpenAIClient(api_key=api_key, model=model or "gpt-4o-mini")
+        elif self.provider == "anthropic":
+            self._client = AnthropicClient(api_key=api_key, model=model or "claude-3-haiku-20240307")
+        else:
+            raise ValueError(f"Unknown provider: {provider}. Use 'openai' or 'anthropic'")
+
+    def generate_test_plan(self, endpoint: Endpoint, spec: NormalizedSpec) -> GeneratedTestPlan:
+        return self._client.generate_test_plan(endpoint, spec)
+
+    def generate_tests_for_spec(self, spec: NormalizedSpec) -> list[GeneratedTestPlan]:
+        return self._client.generate_tests_for_spec(spec)
+
+
+# =============================================================================
+# Factory Function
+# =============================================================================
+
+
+def get_llm_client(
+    provider: Optional[str] = None,
+    api_key: Optional[str] = None,
+    model: Optional[str] = None,
+) -> LLMClient:
+    """
+    Factory function to get the appropriate LLM client.
+
+    Args:
+        provider: "mock", "openai", or "anthropic". Defaults to env LLM_PROVIDER or "mock"
+        api_key: API key for the provider. Defaults to env variable
+        model: Model name. Defaults to provider's default model
+
+    Returns:
+        Configured LLM client
+
+    Environment variables:
+        LLM_PROVIDER: Default provider (mock, openai, anthropic)
+        OPENAI_API_KEY: OpenAI API key
+        ANTHROPIC_API_KEY: Anthropic API key
+        LLM_API_KEY: Fallback API key for any provider
+        OPENAI_MODEL: Default OpenAI model
+        ANTHROPIC_MODEL: Default Anthropic model
+    """
+    # Determine provider
+    if provider is None:
+        provider = os.environ.get("LLM_PROVIDER", "mock")
+
+    provider = provider.lower()
+
+    # Mock client (default)
+    if provider == "mock":
+        return MockLLMClient()
+
+    # OpenAI client
+    if provider == "openai":
+        key = api_key or os.environ.get("OPENAI_API_KEY") or os.environ.get("LLM_API_KEY")
+        if not key:
+            print("Warning: No OpenAI API key found, falling back to mock")
+            return MockLLMClient()
+        mdl = model or os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
+        return OpenAIClient(api_key=key, model=mdl)
+
+    # Anthropic client
+    if provider == "anthropic":
+        key = api_key or os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("LLM_API_KEY")
+        if not key:
+            print("Warning: No Anthropic API key found, falling back to mock")
+            return MockLLMClient()
+        mdl = model or os.environ.get("ANTHROPIC_MODEL", "claude-3-haiku-20240307")
+        return AnthropicClient(api_key=key, model=mdl)
+
+    # Unknown provider
+    print(f"Warning: Unknown provider '{provider}', falling back to mock")
+    return MockLLMClient()
diff --git a/app/generator/test_generator.py b/app/generator/test_generator.py
new file mode 100644
index 00000000..6cf15254
--- /dev/null
+++ b/app/generator/test_generator.py
@@ -0,0 +1,245 @@
+"""
+Test Generator - Generate pytest files from OpenAPI specs.
+
+This module takes a normalized OpenAPI spec and generates
+executable pytest test files.
+"""
+
+from pathlib import Path
+from typing import Optional
+
+from app.openapi_parser import NormalizedSpec
+from .llm_client import LLMClient, MockLLMClient, TestPlan, TestCase
+
+
+class PytestGenerator:
+    """
+    Generates pytest test files from OpenAPI specifications.
+    """
+
+    def __init__(
+        self, llm_client: Optional[LLMClient] = None, output_dir: Optional[Path] = None
+    ):
+        """
+        Initialize the test generator.
+
+        Args:
+            llm_client: LLM client to use for generation. Defaults to MockLLMClient.
+            output_dir: Directory to write generated tests. Defaults to ./generated_tests
+        """
+        self.llm_client = llm_client or MockLLMClient()
+        self.output_dir = output_dir or Path("./generated_tests")
+
+    def generate(self, spec: NormalizedSpec, spec_id: str) -> list[Path]:
+        """
+        Generate pytest files for a spec.
+
+        Args:
+            spec: The normalized OpenAPI spec
+            spec_id: Unique identifier for this spec (used in directory name)
+
+        Returns:
+            List of paths to generated test files
+        """
+        # Create output directory
+        test_dir = self.output_dir / spec_id
+        test_dir.mkdir(parents=True, exist_ok=True)
+
+        # Generate test plans
+        test_plans = self.llm_client.generate_tests_for_spec(spec)
+
+        # Generate files
+        generated_files = []
+
+        # Generate conftest.py
+        conftest_path = test_dir / "conftest.py"
+        conftest_content = self._generate_conftest(spec)
+        conftest_path.write_text(conftest_content)
+        generated_files.append(conftest_path)
+
+        # Group test plans by path for file organization
+        plans_by_resource = self._group_by_resource(test_plans)
+
+        for resource_name, plans in plans_by_resource.items():
+            test_file = test_dir / f"test_{resource_name}.py"
+            content = self._generate_test_file(plans, spec)
+            test_file.write_text(content)
+            generated_files.append(test_file)
+
+        return generated_files
+
+    def _group_by_resource(self, plans: list[TestPlan]) -> dict[str, list[TestPlan]]:
+        """Group test plans by resource (first path segment)"""
+        groups = {}
+        for plan in plans:
+            # Extract resource name from path
+            path_parts = plan.endpoint_path.strip("/").split("/")
+            resource = path_parts[0] if path_parts else "root"
+            resource = resource.replace("{", "").replace("}", "")
+
+            if resource not in groups:
+                groups[resource] = []
+            groups[resource].append(plan)
+
+        return groups
+
+    def _generate_conftest(self, spec: NormalizedSpec) -> str:
+        """Generate conftest.py with fixtures"""
+        base_url = spec.base_url or "http://localhost:8001"
+
+        return f'''"""
+Pytest fixtures for {spec.title} tests.
+Auto-generated from OpenAPI spec.
+"""
+import os
+import pytest
+import httpx
+
+
+@pytest.fixture
+def base_url():
+    """Base URL for the API under test"""
+    return os.environ.get("TARGET_BASE_URL", "{base_url}")
+
+
+@pytest.fixture
+def client(base_url):
+    """HTTP client for making requests"""
+    with httpx.Client(base_url=base_url, timeout=30.0) as client:
+        yield client
+
+
+@pytest.fixture
+def auth_headers():
+    """Optional authentication headers"""
+    token = os.environ.get("API_TOKEN")
+    if token:
+        return {{"Authorization": f"Bearer {{token}}"}}
+    return {{}}
+'''
+
+    def _generate_test_file(self, plans: list[TestPlan], spec: NormalizedSpec) -> str:
+        """Generate a test file for a set of test plans"""
+        lines = [
+            '"""',
+            f"Auto-generated tests for {spec.title}",
+            '"""',
+            "import pytest",
+            "",
+            "",
+        ]
+
+        for plan in plans:
+            for test_case in plan.test_cases:
+                lines.extend(self._generate_test_function(test_case))
+                lines.append("")
+
+        return "\n".join(lines)
+
+    def _generate_test_function(self, test_case: TestCase) -> list[str]:
+        """Generate a single test function"""
+        lines = []
+
+        # Function definition
+        lines.append(f"def {test_case.name}(client):")
+        lines.append('    """')
+        lines.append(f"    {test_case.description}")
+        lines.append('    """')
+
+        # Check if this is a success test for GET/DELETE with path params
+        # These need to create the resource first
+        test_name_lower = test_case.name.lower()
+        is_error_test = any(
+            term in test_name_lower
+            for term in ["not_found", "notfound", "error", "invalid", "404", "422"]
+        )
+        needs_setup = (
+            test_case.path_params
+            and test_case.method in ["GET", "DELETE"]
+            and not is_error_test
+            and test_case.expected_status in [200, 204]
+        )
+
+        if needs_setup:
+            # Generate setup code to create the resource first
+            lines.extend(self._generate_resource_setup(test_case))
+        else:
+            # Build the request path directly
+            path = test_case.path
+            if test_case.path_params:
+                for param_name, param_value in test_case.path_params.items():
+                    path = path.replace(f"{{{param_name}}}", f"{param_value}")
+                lines.append(f'    url = "{path}"')
+            else:
+                lines.append(f'    url = "{path}"')
+
+            # Make the request
+            method = test_case.method.lower()
+            if test_case.request_body is not None:
+                body_str = repr(test_case.request_body)
+                lines.append(f"    response = client.{method}(url, json={body_str})")
+            else:
+                lines.append(f"    response = client.{method}(url)")
+
+        # Add assertions
+        lines.append("")
+        for assertion in test_case.assertions:
+            lines.append(f"    {assertion}")
+
+        return lines
+
+    def _generate_resource_setup(self, test_case: TestCase) -> list[str]:
+        """Generate setup code to create a resource before GET/DELETE tests"""
+        lines = []
+
+        # Extract the collection path (e.g., /items from /items/{id})
+        path_parts = test_case.path.split("/")
+        # Find the part with {param} and take everything before it
+        collection_parts = []
+        for part in path_parts:
+            if "{" in part:
+                break
+            collection_parts.append(part)
+        collection_path = "/".join(collection_parts)
+
+        # First create the resource
+        lines.append(f'    # First, create a resource to {test_case.method.lower()}')
+        lines.append(f'    create_response = client.post("{collection_path}", json={{"name": "Test Item", "price": 9.99}})')
+        lines.append('    assert create_response.status_code in [200, 201], f"Setup failed: {create_response.text}"')
+        lines.append('    created = create_response.json()')
+        lines.append('    resource_id = created.get("id", created.get("_id", ""))')
+        lines.append("")
+
+        # Now make the actual request using the created ID
+        # Replace path param with the dynamic ID
+        path_template = test_case.path
+        for param_name in test_case.path_params.keys():
+            path_template = path_template.replace(f"{{{param_name}}}", "{resource_id}")
+
+        lines.append(f'    url = f"{path_template}"')
+        method = test_case.method.lower()
+        lines.append(f"    response = client.{method}(url)")
+
+        return lines
+
+
+def generate_tests(
+    spec: NormalizedSpec,
+    spec_id: str,
+    output_dir: Optional[Path] = None,
+    llm_client: Optional[LLMClient] = None,
+) -> list[Path]:
+    """
+    Convenience function to generate tests.
+
+    Args:
+        spec: The normalized OpenAPI spec
+        spec_id: Unique identifier for this spec
+        output_dir: Directory to write tests
+        llm_client: LLM client to use
+
+    Returns:
+        List of generated file paths
+    """
+    generator = PytestGenerator(llm_client=llm_client, output_dir=output_dir)
+    return generator.generate(spec, spec_id)
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 00000000..3f78651b
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,33 @@
+"""
+Main Application - FastAPI entry point for the OpenAPI Test Generator.
+"""
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from app.routes import router
+
+
+app = FastAPI(
+    title="OpenAPI Test Generator",
+    description="Generate and run pytest tests from OpenAPI specifications",
+    version="1.0.0",
+)
+
+# Add CORS middleware for Streamlit integration
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Include routes
+app.include_router(router)
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/app/openapi_parser.py b/app/openapi_parser.py
new file mode 100644
index 00000000..334fa686
--- /dev/null
+++ b/app/openapi_parser.py
@@ -0,0 +1,365 @@
+"""
+OpenAPI Parser - Parse and normalize OpenAPI 3.x specs.
+
+This module provides functionality to parse OpenAPI specifications
+from YAML or JSON format and normalize them into a consistent internal model.
+"""
+
+import json
+import yaml
+from dataclasses import dataclass, field
+from typing import Any, Optional
+
+
+class OpenAPIParseError(Exception):
+    """Raised when parsing an OpenAPI spec fails"""
+
+    pass
+
+
+@dataclass
+class Parameter:
+    """Represents an API parameter (path, query, header, cookie)"""
+
+    name: str
+    location: str  # path, query, header, cookie
+    required: bool = False
+    schema_type: str = "string"
+    description: Optional[str] = None
+    example: Any = None
+
+
+@dataclass
+class RequestBody:
+    """Represents a request body schema"""
+
+    content_type: str = "application/json"
+    schema: dict = field(default_factory=dict)
+    required: bool = False
+    example: Any = None
+
+
+@dataclass
+class Response:
+    """Represents an API response"""
+
+    status_code: int
+    description: str = ""
+    schema: Optional[dict] = None
+    example: Any = None
+
+
+@dataclass
+class Endpoint:
+    """Represents a single API endpoint"""
+
+    path: str
+    method: str
+    operation_id: Optional[str] = None
+    summary: Optional[str] = None
+    description: Optional[str] = None
+    tags: list[str] = field(default_factory=list)
+    parameters: list[Parameter] = field(default_factory=list)
+    request_body: Optional[RequestBody] = None
+    responses: list[Response] = field(default_factory=list)
+
+
+@dataclass
+class NormalizedSpec:
+    """Normalized OpenAPI specification"""
+
+    title: str
+    version: str
+    description: Optional[str] = None
+    base_url: Optional[str] = None
+    endpoints: list[Endpoint] = field(default_factory=list)
+    schemas: dict = field(default_factory=dict)
+
+
+def parse_spec(content: str) -> dict:
+    """
+    Parse OpenAPI spec from YAML or JSON string.
+
+    Args:
+        content: The raw spec content as a string
+
+    Returns:
+        Parsed spec as a dictionary
+
+    Raises:
+        OpenAPIParseError: If the content cannot be parsed
+    """
+    if not content or not content.strip():
+        raise OpenAPIParseError("Empty spec content")
+
+    content = content.strip()
+
+    # Try JSON first
+    if content.startswith("{"):
+        try:
+            return json.loads(content)
+        except json.JSONDecodeError as e:
+            raise OpenAPIParseError(f"Invalid JSON: {e}")
+
+    # Try YAML
+    try:
+        result = yaml.safe_load(content)
+        if not isinstance(result, dict):
+            raise OpenAPIParseError("Spec must be a YAML/JSON object")
+        return result
+    except yaml.YAMLError as e:
+        raise OpenAPIParseError(f"Invalid YAML: {e}")
+
+
+def _resolve_ref(ref: str, spec: dict) -> dict:
+    """
+    Resolve a $ref pointer in the spec.
+
+    Args:
+        ref: The $ref string (e.g., "#/components/schemas/Item")
+        spec: The full spec to resolve against
+
+    Returns:
+        The resolved schema
+    """
+    if not ref.startswith("#/"):
+        return {}
+
+    parts = ref[2:].split("/")
+    result = spec
+    for part in parts:
+        if isinstance(result, dict) and part in result:
+            result = result[part]
+        else:
+            return {}
+    return result if isinstance(result, dict) else {}
+
+
+def _resolve_schema(schema: dict, spec: dict) -> dict:
+    """
+    Recursively resolve $ref in a schema.
+
+    Args:
+        schema: The schema that may contain $ref
+        spec: The full spec to resolve against
+
+    Returns:
+        The resolved schema
+    """
+    if not schema:
+        return {}
+
+    if "$ref" in schema:
+        resolved = _resolve_ref(schema["$ref"], spec)
+        # Merge any additional properties from the original schema
+        result = resolved.copy()
+        for key, value in schema.items():
+            if key != "$ref":
+                result[key] = value
+        return result
+
+    return schema
+
+
+def _parse_parameter(param_data: dict, spec: dict) -> Parameter:
+    """Parse a parameter definition"""
+    # Resolve if it's a reference
+    if "$ref" in param_data:
+        param_data = _resolve_ref(param_data["$ref"], spec)
+
+    schema = param_data.get("schema", {})
+    if "$ref" in schema:
+        schema = _resolve_ref(schema["$ref"], spec)
+
+    return Parameter(
+        name=param_data.get("name", ""),
+        location=param_data.get("in", "query"),
+        required=param_data.get("required", False),
+        schema_type=schema.get("type", "string"),
+        description=param_data.get("description"),
+        example=param_data.get("example") or schema.get("example"),
+    )
+
+
+def _parse_request_body(body_data: dict, spec: dict) -> Optional[RequestBody]:
+    """Parse a request body definition"""
+    if not body_data:
+        return None
+
+    # Resolve if it's a reference
+    if "$ref" in body_data:
+        body_data = _resolve_ref(body_data["$ref"], spec)
+
+    content = body_data.get("content", {})
+
+    # Prefer application/json
+    if "application/json" in content:
+        content_type = "application/json"
+        media_type = content["application/json"]
+    elif content:
+        content_type = next(iter(content))
+        media_type = content[content_type]
+    else:
+        return None
+
+    schema = media_type.get("schema", {})
+    schema = _resolve_schema(schema, spec)
+
+    return RequestBody(
+        content_type=content_type,
+        schema=schema,
+        required=body_data.get("required", False),
+        example=media_type.get("example"),
+    )
+
+
+def _parse_responses(responses_data: dict, spec: dict) -> list[Response]:
+    """Parse response definitions"""
+    responses = []
+
+    for status_code, response_data in responses_data.items():
+        # Resolve if it's a reference
+        if "$ref" in response_data:
+            response_data = _resolve_ref(response_data["$ref"], spec)
+
+        # Get schema from content
+        schema = None
+        example = None
+        content = response_data.get("content", {})
+        if "application/json" in content:
+            media_type = content["application/json"]
+            schema = _resolve_schema(media_type.get("schema", {}), spec)
+            example = media_type.get("example")
+
+        try:
+            code = int(status_code)
+        except ValueError:
+            # Handle 'default' or other non-numeric status codes
+            code = 0
+
+        responses.append(
+            Response(
+                status_code=code,
+                description=response_data.get("description", ""),
+                schema=schema,
+                example=example,
+            )
+        )
+
+    return responses
+
+
+def _parse_endpoint(path: str, method: str, operation: dict, spec: dict) -> Endpoint:
+    """Parse a single endpoint operation"""
+    # Collect parameters from both path level and operation level
+    parameters = []
+    for param in operation.get("parameters", []):
+        parameters.append(_parse_parameter(param, spec))
+
+    return Endpoint(
+        path=path,
+        method=method.upper(),
+        operation_id=operation.get("operationId"),
+        summary=operation.get("summary"),
+        description=operation.get("description"),
+        tags=operation.get("tags", []),
+        parameters=parameters,
+        request_body=_parse_request_body(operation.get("requestBody"), spec),
+        responses=_parse_responses(operation.get("responses", {}), spec),
+    )
+
+
+def normalize_spec(raw_spec: dict) -> NormalizedSpec:
+    """
+    Normalize a parsed OpenAPI spec into our internal model.
+
+    Args:
+        raw_spec: The parsed spec dictionary
+
+    Returns:
+        NormalizedSpec with all endpoints extracted
+
+    Raises:
+        OpenAPIParseError: If the spec is invalid
+    """
+    # Validate basic structure
+    if not isinstance(raw_spec, dict):
+        raise OpenAPIParseError("Spec must be an object")
+
+    # Check OpenAPI version
+    openapi_version = raw_spec.get("openapi", "")
+    if not openapi_version.startswith("3."):
+        swagger_version = raw_spec.get("swagger", "")
+        if swagger_version:
+            raise OpenAPIParseError(
+                f"Swagger/OpenAPI 2.x not supported, found version {swagger_version}"
+            )
+        if not openapi_version:
+            raise OpenAPIParseError("Missing 'openapi' version field")
+        raise OpenAPIParseError(f"Unsupported OpenAPI version: {openapi_version}")
+
+    # Get info
+    info = raw_spec.get("info", {})
+    if not info.get("title"):
+        raise OpenAPIParseError("Missing required field: info.title")
+    if not info.get("version"):
+        raise OpenAPIParseError("Missing required field: info.version")
+
+    # Get base URL from servers
+    base_url = None
+    servers = raw_spec.get("servers", [])
+    if servers and isinstance(servers, list) and servers[0].get("url"):
+        base_url = servers[0]["url"]
+
+    # Extract schemas
+    schemas = raw_spec.get("components", {}).get("schemas", {})
+
+    # Parse all endpoints
+    endpoints = []
+    paths = raw_spec.get("paths", {})
+    http_methods = ["get", "post", "put", "patch", "delete", "head", "options"]
+
+    # Sort paths for deterministic ordering
+    for path in sorted(paths.keys()):
+        path_item = paths[path]
+        if not isinstance(path_item, dict):
+            continue
+
+        # Get path-level parameters
+        path_params = path_item.get("parameters", [])
+
+        # Sort methods for deterministic ordering
+        for method in sorted(http_methods):
+            if method in path_item:
+                operation = path_item[method]
+                if isinstance(operation, dict):
+                    # Merge path-level parameters with operation parameters
+                    if path_params:
+                        op_params = operation.get("parameters", [])
+                        operation = operation.copy()
+                        operation["parameters"] = path_params + op_params
+
+                    endpoints.append(_parse_endpoint(path, method, operation, raw_spec))
+
+    return NormalizedSpec(
+        title=info["title"],
+        version=info["version"],
+        description=info.get("description"),
+        base_url=base_url,
+        endpoints=endpoints,
+        schemas=schemas,
+    )
+
+
+def parse_and_normalize(content: str) -> NormalizedSpec:
+    """
+    Convenience function to parse and normalize in one step.
+
+    Args:
+        content: Raw spec content as YAML or JSON string
+
+    Returns:
+        NormalizedSpec
+    """
+    raw = parse_spec(content)
+    return normalize_spec(raw)
diff --git a/app/routes.py b/app/routes.py
new file mode 100644
index 00000000..16021b98
--- /dev/null
+++ b/app/routes.py
@@ -0,0 +1,451 @@
+"""
+API Routes - FastAPI endpoints for the test generator service.
+"""
+
+import json
+from pathlib import Path
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, status
+from fastapi.responses import Response
+from pydantic import BaseModel
+
+from app.config import config
+from app.openapi_parser import parse_and_normalize, OpenAPIParseError
+from app.generator import TestGenerator, get_llm_client
+from app.runner import run_tests
+from app.storage.db import get_database
+
+
+router = APIRouter()
+
+
+# Request/Response models
+class SpecCreate(BaseModel):
+    """Request to create a spec"""
+
+    name: str
+    content: str
+
+
+class SpecResponse(BaseModel):
+    """Response for spec operations"""
+
+    id: str
+    name: str
+    title: str
+    version: str
+    endpoint_count: int
+    created_at: str
+
+
+class SpecDetailResponse(SpecResponse):
+    """Detailed spec response with endpoints"""
+
+    endpoints: list[dict]
+
+
+class GenerateRequest(BaseModel):
+    """Request to generate tests"""
+
+    spec_id: str
+
+
+class GenerationResponse(BaseModel):
+    """Response for generation operations"""
+
+    id: str
+    spec_id: str
+    status: str
+    files: list[str]
+    created_at: str
+
+
+class RunRequest(BaseModel):
+    """Request to run tests"""
+
+    generation_id: str
+    target_url: Optional[str] = None
+
+
+class FailureInfo(BaseModel):
+    """Information about a test failure"""
+
+    name: str
+    classname: str
+    message: Optional[str] = None
+    traceback: Optional[str] = None
+
+
+class RunResponse(BaseModel):
+    """Response for run operations"""
+
+    id: str
+    generation_id: str
+    status: str
+    passed: int
+    failed: int
+    skipped: int
+    errors: int
+    total: int
+    duration: float
+    created_at: str
+    completed_at: Optional[str] = None
+
+
+class RunDetailResponse(RunResponse):
+    """Detailed run response with failures"""
+
+    failures: list[FailureInfo]
+
+
+# Health endpoint
+@router.get("/health")
+def health_check():
+    """Health check endpoint"""
+    return {"status": "ok"}
+
+
+# Spec endpoints
+@router.post("/specs", response_model=SpecResponse, status_code=status.HTTP_201_CREATED)
+def create_spec(request: SpecCreate):
+    """Upload and store an OpenAPI specification"""
+    try:
+        # Parse and normalize the spec
+        normalized = parse_and_normalize(request.content)
+
+        # Store in database
+        db = get_database(config.DATABASE_PATH)
+        spec = db.create_spec(
+            name=request.name,
+            raw_content=request.content,
+            title=normalized.title,
+            version=normalized.version,
+            endpoint_count=len(normalized.endpoints),
+        )
+
+        return SpecResponse(
+            id=spec.id,
+            name=spec.name,
+            title=spec.title,
+            version=spec.version,
+            endpoint_count=spec.endpoint_count,
+            created_at=spec.created_at.isoformat(),
+        )
+
+    except OpenAPIParseError as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid OpenAPI spec: {e}"
+        )
+
+
+@router.get("/specs", response_model=list[SpecResponse])
+def list_specs():
+    """List all stored specifications"""
+    db = get_database(config.DATABASE_PATH)
+    specs = db.list_specs()
+
+    return [
+        SpecResponse(
+            id=s.id,
+            name=s.name,
+            title=s.title,
+            version=s.version,
+            endpoint_count=s.endpoint_count,
+            created_at=s.created_at.isoformat(),
+        )
+        for s in specs
+    ]
+
+
+@router.get("/specs/{spec_id}", response_model=SpecDetailResponse)
+def get_spec(spec_id: str):
+    """Get detailed information about a specification"""
+    db = get_database(config.DATABASE_PATH)
+    spec = db.get_spec(spec_id)
+
+    if not spec:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Spec with id '{spec_id}' not found",
+        )
+
+    # Parse to get endpoints
+    normalized = parse_and_normalize(spec.raw_content)
+    endpoints = [
+        {
+            "path": ep.path,
+            "method": ep.method,
+            "operation_id": ep.operation_id,
+            "summary": ep.summary,
+        }
+        for ep in normalized.endpoints
+    ]
+
+    return SpecDetailResponse(
+        id=spec.id,
+        name=spec.name,
+        title=spec.title,
+        version=spec.version,
+        endpoint_count=spec.endpoint_count,
+        created_at=spec.created_at.isoformat(),
+        endpoints=endpoints,
+    )
+
+
+@router.delete("/specs/{spec_id}", status_code=status.HTTP_204_NO_CONTENT)
+def delete_spec(spec_id: str):
+    """Delete a specification"""
+    db = get_database(config.DATABASE_PATH)
+    if not db.delete_spec(spec_id):
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Spec with id '{spec_id}' not found",
+        )
+
+
+# Generation endpoints
+@router.post(
+    "/generate", response_model=GenerationResponse, status_code=status.HTTP_201_CREATED
+)
+def generate_tests(request: GenerateRequest):
+    """Generate pytest files from a specification"""
+    db = get_database(config.DATABASE_PATH)
+
+    # Get the spec
+    spec = db.get_spec(request.spec_id)
+    if not spec:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Spec with id '{request.spec_id}' not found",
+        )
+
+    try:
+        # Parse the spec
+        normalized = parse_and_normalize(spec.raw_content)
+
+        # Generate tests
+        generator = TestGenerator(
+            llm_client=get_llm_client(), output_dir=config.GENERATED_TESTS_DIR
+        )
+        generated_files = generator.generate(normalized, request.spec_id)
+
+        # Store generation record
+        generation = db.create_generation(
+            spec_id=request.spec_id,
+            test_dir=str(config.GENERATED_TESTS_DIR / request.spec_id),
+            files=[str(f) for f in generated_files],
+        )
+
+        return GenerationResponse(
+            id=generation.id,
+            spec_id=generation.spec_id,
+            status=generation.status,
+            files=[str(f) for f in generated_files],
+            created_at=generation.created_at.isoformat(),
+        )
+
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to generate tests: {e}",
+        )
+
+
+@router.get("/generations/{generation_id}", response_model=GenerationResponse)
+def get_generation(generation_id: str):
+    """Get information about a test generation"""
+    db = get_database(config.DATABASE_PATH)
+    generation = db.get_generation(generation_id)
+
+    if not generation:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Generation with id '{generation_id}' not found",
+        )
+
+    return GenerationResponse(
+        id=generation.id,
+        spec_id=generation.spec_id,
+        status=generation.status,
+        files=generation.files,
+        created_at=generation.created_at.isoformat(),
+    )
+
+
+@router.get("/generations", response_model=list[GenerationResponse])
+def list_generations(spec_id: Optional[str] = None):
+    """List all generations, optionally filtered by spec_id"""
+    db = get_database(config.DATABASE_PATH)
+    generations = db.list_generations(spec_id)
+
+    return [
+        GenerationResponse(
+            id=g.id,
+            spec_id=g.spec_id,
+            status=g.status,
+            files=g.files,
+            created_at=g.created_at.isoformat(),
+        )
+        for g in generations
+    ]
+
+
+# Run endpoints
+@router.post("/runs", response_model=RunResponse, status_code=status.HTTP_201_CREATED)
+def create_run(request: RunRequest):
+    """Execute generated tests and return results"""
+    db = get_database(config.DATABASE_PATH)
+
+    # Get the generation
+    generation = db.get_generation(request.generation_id)
+    if not generation:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Generation with id '{request.generation_id}' not found",
+        )
+
+    if generation.status != "completed":
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Generation is not completed",
+        )
+
+    target_url = request.target_url or config.DEFAULT_TARGET_URL
+
+    # Create run record
+    run = db.create_run(generation_id=request.generation_id, target_url=target_url)
+
+    # Execute tests
+    test_dir = Path(generation.test_dir)
+    result = run_tests(
+        test_dir=test_dir, base_url=target_url, timeout=config.TEST_TIMEOUT
+    )
+
+    # Update run with results
+    status_str = "completed" if result.success else "failed"
+    db.update_run(
+        run_id=run.id,
+        status=status_str,
+        passed=result.passed,
+        failed=result.failed,
+        skipped=result.skipped,
+        errors=result.errors,
+        total=result.total,
+        duration=result.duration,
+        junit_xml_path=str(result.junit_xml_path) if result.junit_xml_path else None,
+        results_json=json.dumps(result.to_dict()),
+    )
+
+    # Get updated run
+    run = db.get_run(run.id)
+
+    return RunResponse(
+        id=run.id,
+        generation_id=run.generation_id,
+        status=run.status,
+        passed=run.passed,
+        failed=run.failed,
+        skipped=run.skipped,
+        errors=run.errors,
+        total=run.total,
+        duration=run.duration,
+        created_at=run.created_at.isoformat(),
+        completed_at=run.completed_at.isoformat() if run.completed_at else None,
+    )
+
+
+@router.get("/runs/{run_id}", response_model=RunDetailResponse)
+def get_run(run_id: str):
+    """Get detailed results of a test run"""
+    db = get_database(config.DATABASE_PATH)
+    run = db.get_run(run_id)
+
+    if not run:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Run with id '{run_id}' not found",
+        )
+
+    # Parse failures from results_json
+    failures = []
+    if run.results_json:
+        try:
+            results = json.loads(run.results_json)
+            for f in results.get("failures", []):
+                failures.append(
+                    FailureInfo(
+                        name=f.get("name", ""),
+                        classname=f.get("classname", ""),
+                        message=f.get("message"),
+                        traceback=f.get("traceback"),
+                    )
+                )
+        except json.JSONDecodeError:
+            pass
+
+    return RunDetailResponse(
+        id=run.id,
+        generation_id=run.generation_id,
+        status=run.status,
+        passed=run.passed,
+        failed=run.failed,
+        skipped=run.skipped,
+        errors=run.errors,
+        total=run.total,
+        duration=run.duration,
+        created_at=run.created_at.isoformat(),
+        completed_at=run.completed_at.isoformat() if run.completed_at else None,
+        failures=failures,
+    )
+
+
+@router.get("/runs/{run_id}/junit")
+def get_junit_xml(run_id: str):
+    """Get raw JUnit XML for a run"""
+    db = get_database(config.DATABASE_PATH)
+    run = db.get_run(run_id)
+
+    if not run:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Run with id '{run_id}' not found",
+        )
+
+    if not run.junit_xml_path:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="JUnit XML not available for this run",
+        )
+
+    junit_path = Path(run.junit_xml_path)
+    if not junit_path.exists():
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND, detail="JUnit XML file not found"
+        )
+
+    return Response(content=junit_path.read_text(), media_type="application/xml")
+
+
+@router.get("/runs", response_model=list[RunResponse])
+def list_runs(generation_id: Optional[str] = None):
+    """List all runs, optionally filtered by generation_id"""
+    db = get_database(config.DATABASE_PATH)
+    runs = db.list_runs(generation_id)
+
+    return [
+        RunResponse(
+            id=r.id,
+            generation_id=r.generation_id,
+            status=r.status,
+            passed=r.passed,
+            failed=r.failed,
+            skipped=r.skipped,
+            errors=r.errors,
+            total=r.total,
+            duration=r.duration,
+            created_at=r.created_at.isoformat(),
+            completed_at=r.completed_at.isoformat() if r.completed_at else None,
+        )
+        for r in runs
+    ]
diff --git a/app/runner/__init__.py b/app/runner/__init__.py
new file mode 100644
index 00000000..48c92649
--- /dev/null
+++ b/app/runner/__init__.py
@@ -0,0 +1,8 @@
+# Runner package
+from .pytest_runner import run_tests, RunResult
+from .junit_parser import parse_junit_xml, JUnitReport, JUnitTestCase
+
+# Keep TestCaseResult as alias for backwards compatibility
+TestCaseResult = JUnitTestCase
+
+__all__ = ["run_tests", "RunResult", "parse_junit_xml", "JUnitReport", "JUnitTestCase", "TestCaseResult"]
diff --git a/app/runner/junit_parser.py b/app/runner/junit_parser.py
new file mode 100644
index 00000000..0d87266d
--- /dev/null
+++ b/app/runner/junit_parser.py
@@ -0,0 +1,199 @@
+"""
+JUnit XML Parser - Parse pytest JUnit XML output.
+
+Extracts test results, failures, and metrics from JUnit XML format.
+"""
+
+import xml.etree.ElementTree as ET
+from dataclasses import dataclass, field
+from typing import Optional
+from pathlib import Path
+
+
+@dataclass
+class JUnitTestCase:
+    """Result of a single test case"""
+
+    name: str
+    classname: str
+    time: float
+    status: str  # "passed", "failed", "skipped", "error"
+    failure_message: Optional[str] = None
+    failure_type: Optional[str] = None
+    failure_text: Optional[str] = None
+    stdout: Optional[str] = None
+    stderr: Optional[str] = None
+
+
+@dataclass
+class TestSuiteResult:
+    """Result of a test suite"""
+
+    name: str
+    tests: int
+    failures: int
+    errors: int
+    skipped: int
+    time: float
+    test_cases: list[JUnitTestCase] = field(default_factory=list)
+
+
+@dataclass
+class JUnitReport:
+    """Complete JUnit report"""
+
+    suites: list[TestSuiteResult] = field(default_factory=list)
+    total_tests: int = 0
+    total_passed: int = 0
+    total_failures: int = 0
+    total_errors: int = 0
+    total_skipped: int = 0
+    total_time: float = 0.0
+
+    @property
+    def success(self) -> bool:
+        """True if all tests passed"""
+        return self.total_failures == 0 and self.total_errors == 0
+
+
+def parse_junit_xml(xml_content: str) -> JUnitReport:
+    """
+    Parse JUnit XML content into a structured report.
+
+    Args:
+        xml_content: Raw XML content as string
+
+    Returns:
+        JUnitReport with parsed results
+    """
+    try:
+        root = ET.fromstring(xml_content)
+    except ET.ParseError as e:
+        raise ValueError(f"Invalid XML: {e}")
+
+    suites = []
+
+    # Handle both <testsuites> and <testsuite> as root
+    if root.tag == "testsuites":
+        suite_elements = root.findall("testsuite")
+    elif root.tag == "testsuite":
+        suite_elements = [root]
+    else:
+        raise ValueError(f"Unexpected root element: {root.tag}")
+
+    for suite_elem in suite_elements:
+        suite = _parse_suite(suite_elem)
+        suites.append(suite)
+
+    # Calculate totals
+    total_tests = sum(s.tests for s in suites)
+    total_failures = sum(s.failures for s in suites)
+    total_errors = sum(s.errors for s in suites)
+    total_skipped = sum(s.skipped for s in suites)
+    total_time = sum(s.time for s in suites)
+    total_passed = total_tests - total_failures - total_errors - total_skipped
+
+    return JUnitReport(
+        suites=suites,
+        total_tests=total_tests,
+        total_passed=total_passed,
+        total_failures=total_failures,
+        total_errors=total_errors,
+        total_skipped=total_skipped,
+        total_time=total_time,
+    )
+
+
+def parse_junit_file(file_path: Path) -> JUnitReport:
+    """
+    Parse JUnit XML from a file.
+
+    Args:
+        file_path: Path to the JUnit XML file
+
+    Returns:
+        JUnitReport with parsed results
+    """
+    content = file_path.read_text()
+    return parse_junit_xml(content)
+
+
+def _parse_suite(elem: ET.Element) -> TestSuiteResult:
+    """Parse a testsuite element"""
+    test_cases = []
+    for tc_elem in elem.findall("testcase"):
+        test_cases.append(_parse_testcase(tc_elem))
+
+    return TestSuiteResult(
+        name=elem.get("name", ""),
+        tests=int(elem.get("tests", 0)),
+        failures=int(elem.get("failures", 0)),
+        errors=int(elem.get("errors", 0)),
+        skipped=int(elem.get("skipped", 0)),
+        time=float(elem.get("time", 0)),
+        test_cases=test_cases,
+    )
+
+
+def _parse_testcase(elem: ET.Element) -> JUnitTestCase:
+    """Parse a testcase element"""
+    status = "passed"
+    failure_message = None
+    failure_type = None
+    failure_text = None
+
+    # Check for failure
+    failure = elem.find("failure")
+    if failure is not None:
+        status = "failed"
+        failure_message = failure.get("message", "")
+        failure_type = failure.get("type", "")
+        failure_text = failure.text
+
+    # Check for error
+    error = elem.find("error")
+    if error is not None:
+        status = "error"
+        failure_message = error.get("message", "")
+        failure_type = error.get("type", "")
+        failure_text = error.text
+
+    # Check for skipped
+    skipped = elem.find("skipped")
+    if skipped is not None:
+        status = "skipped"
+        failure_message = skipped.get("message", "")
+
+    # Get stdout/stderr
+    stdout_elem = elem.find("system-out")
+    stderr_elem = elem.find("system-err")
+
+    return JUnitTestCase(
+        name=elem.get("name", ""),
+        classname=elem.get("classname", ""),
+        time=float(elem.get("time", 0)),
+        status=status,
+        failure_message=failure_message,
+        failure_type=failure_type,
+        failure_text=failure_text,
+        stdout=stdout_elem.text if stdout_elem is not None else None,
+        stderr=stderr_elem.text if stderr_elem is not None else None,
+    )
+
+
+def get_failures(report: JUnitReport) -> list[JUnitTestCase]:
+    """
+    Extract all failed test cases from a report.
+
+    Args:
+        report: The JUnit report
+
+    Returns:
+        List of failed test cases
+    """
+    failures = []
+    for suite in report.suites:
+        for tc in suite.test_cases:
+            if tc.status in ("failed", "error"):
+                failures.append(tc)
+    return failures
diff --git a/app/runner/pytest_runner.py b/app/runner/pytest_runner.py
new file mode 100644
index 00000000..fda5019a
--- /dev/null
+++ b/app/runner/pytest_runner.py
@@ -0,0 +1,206 @@
+"""
+Pytest Runner - Execute pytest tests programmatically.
+
+Runs generated tests and captures results including JUnit XML output.
+"""
+
+import os
+import subprocess
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+from .junit_parser import parse_junit_file, get_failures
+
+
+@dataclass
+class FailedTest:
+    """Information about a failed test"""
+
+    name: str
+    classname: str
+    message: Optional[str] = None
+    traceback: Optional[str] = None
+
+
+@dataclass
+class RunResult:
+    """Result of a test run"""
+
+    success: bool
+    exit_code: int
+    passed: int = 0
+    failed: int = 0
+    skipped: int = 0
+    errors: int = 0
+    total: int = 0
+    duration: float = 0.0
+    junit_xml_path: Optional[Path] = None
+    stdout: str = ""
+    stderr: str = ""
+    failures: list[FailedTest] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization"""
+        return {
+            "success": self.success,
+            "exit_code": self.exit_code,
+            "passed": self.passed,
+            "failed": self.failed,
+            "skipped": self.skipped,
+            "errors": self.errors,
+            "total": self.total,
+            "duration": self.duration,
+            "junit_xml_path": str(self.junit_xml_path) if self.junit_xml_path else None,
+            "failures": [
+                {
+                    "name": f.name,
+                    "classname": f.classname,
+                    "message": f.message,
+                    "traceback": f.traceback,
+                }
+                for f in self.failures
+            ],
+        }
+
+
+def run_tests(
+    test_dir: Path,
+    base_url: str,
+    output_dir: Optional[Path] = None,
+    timeout: int = 300,
+    verbose: bool = True,
+) -> RunResult:
+    """
+    Run pytest on a test directory.
+
+    Args:
+        test_dir: Directory containing test files
+        base_url: Base URL for the API under test
+        output_dir: Directory to store artifacts (defaults to test_dir)
+        timeout: Maximum execution time in seconds
+        verbose: Enable verbose output
+
+    Returns:
+        RunResult with test execution details
+    """
+    if not test_dir.exists():
+        return RunResult(
+            success=False,
+            exit_code=1,
+            stdout="",
+            stderr=f"Test directory does not exist: {test_dir}",
+        )
+
+    output_dir = output_dir or test_dir
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    junit_xml_path = output_dir / "junit.xml"
+
+    # Build pytest command
+    cmd = [
+        "python",
+        "-m",
+        "pytest",
+        str(test_dir),
+        f"--junitxml={junit_xml_path}",
+        "--tb=short",
+    ]
+
+    if verbose:
+        cmd.append("-v")
+
+    # Set up environment
+    env = os.environ.copy()
+    env["TARGET_BASE_URL"] = base_url
+    env["PYTHONPATH"] = str(test_dir.parent)
+
+    # Run pytest
+    start_time = time.time()
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            env=env,
+            cwd=str(test_dir.parent),
+        )
+        exit_code = result.returncode
+        stdout = result.stdout
+        stderr = result.stderr
+    except subprocess.TimeoutExpired:
+        return RunResult(
+            success=False,
+            exit_code=-1,
+            stdout="",
+            stderr=f"Test execution timed out after {timeout} seconds",
+            duration=timeout,
+        )
+    except Exception as e:
+        return RunResult(
+            success=False, exit_code=-1, stdout="", stderr=f"Failed to run tests: {e}"
+        )
+
+    duration = time.time() - start_time
+
+    # Parse JUnit XML if it exists
+    passed = 0
+    failed = 0
+    skipped = 0
+    errors = 0
+    total = 0
+    failures = []
+
+    if junit_xml_path.exists():
+        try:
+            report = parse_junit_file(junit_xml_path)
+            passed = report.total_passed
+            failed = report.total_failures
+            skipped = report.total_skipped
+            errors = report.total_errors
+            total = report.total_tests
+
+            # Extract failure details
+            for tc in get_failures(report):
+                failures.append(
+                    FailedTest(
+                        name=tc.name,
+                        classname=tc.classname,
+                        message=tc.failure_message,
+                        traceback=tc.failure_text,
+                    )
+                )
+        except Exception as e:
+            stderr += f"\nFailed to parse JUnit XML: {e}"
+
+    return RunResult(
+        success=exit_code == 0,
+        exit_code=exit_code,
+        passed=passed,
+        failed=failed,
+        skipped=skipped,
+        errors=errors,
+        total=total,
+        duration=duration,
+        junit_xml_path=junit_xml_path if junit_xml_path.exists() else None,
+        stdout=stdout,
+        stderr=stderr,
+        failures=failures,
+    )
+
+
+def get_junit_xml(run_result: RunResult) -> Optional[str]:
+    """
+    Get the raw JUnit XML content from a run result.
+
+    Args:
+        run_result: The run result
+
+    Returns:
+        Raw XML content or None if not available
+    """
+    if run_result.junit_xml_path and run_result.junit_xml_path.exists():
+        return run_result.junit_xml_path.read_text()
+    return None
diff --git a/app/storage/__init__.py b/app/storage/__init__.py
new file mode 100644
index 00000000..052092f9
--- /dev/null
+++ b/app/storage/__init__.py
@@ -0,0 +1,4 @@
+# Storage package
+from .db import Database, Spec, Generation, Run
+
+__all__ = ["Database", "Spec", "Generation", "Run"]
diff --git a/app/storage/db.py b/app/storage/db.py
new file mode 100644
index 00000000..3f8f5cf1
--- /dev/null
+++ b/app/storage/db.py
@@ -0,0 +1,429 @@
+"""
+Database - SQLite storage for specs, generations, and runs.
+
+Simple, lightweight storage using Python's built-in sqlite3.
+"""
+
+import json
+import sqlite3
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+
+
+def _utc_now() -> datetime:
+    """Get current UTC time (timezone-aware)"""
+    return datetime.now(timezone.utc)
+
+
+@dataclass
+class Spec:
+    """Stored OpenAPI specification"""
+
+    id: str
+    name: str
+    raw_content: str
+    title: str
+    version: str
+    endpoint_count: int
+    created_at: datetime = field(default_factory=_utc_now)
+
+
+@dataclass
+class Generation:
+    """Test generation record"""
+
+    id: str
+    spec_id: str
+    test_dir: str
+    files: list[str]
+    status: str = "pending"  # pending, completed, failed
+    error: Optional[str] = None
+    created_at: datetime = field(default_factory=_utc_now)
+
+
+@dataclass
+class Run:
+    """Test run record"""
+
+    id: str
+    generation_id: str
+    target_url: str
+    status: str = "pending"  # pending, running, completed, failed
+    passed: int = 0
+    failed: int = 0
+    skipped: int = 0
+    errors: int = 0
+    total: int = 0
+    duration: float = 0.0
+    junit_xml_path: Optional[str] = None
+    results_json: Optional[str] = None
+    created_at: datetime = field(default_factory=_utc_now)
+    completed_at: Optional[datetime] = None
+
+
+class Database:
+    """SQLite database for the test generator"""
+
+    def __init__(self, db_path: Path = Path("./data/app.db")):
+        self.db_path = db_path
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self._init_db()
+
+    def _get_connection(self) -> sqlite3.Connection:
+        """Get a database connection"""
+        conn = sqlite3.connect(str(self.db_path))
+        conn.row_factory = sqlite3.Row
+        return conn
+
+    def _init_db(self):
+        """Initialize database schema"""
+        with self._get_connection() as conn:
+            conn.executescript("""
+                CREATE TABLE IF NOT EXISTS specs (
+                    id TEXT PRIMARY KEY,
+                    name TEXT NOT NULL,
+                    raw_content TEXT NOT NULL,
+                    title TEXT NOT NULL,
+                    version TEXT NOT NULL,
+                    endpoint_count INTEGER NOT NULL,
+                    created_at TEXT NOT NULL
+                );
+
+                CREATE TABLE IF NOT EXISTS generations (
+                    id TEXT PRIMARY KEY,
+                    spec_id TEXT NOT NULL,
+                    test_dir TEXT NOT NULL,
+                    files TEXT NOT NULL,
+                    status TEXT NOT NULL DEFAULT 'pending',
+                    error TEXT,
+                    created_at TEXT NOT NULL,
+                    FOREIGN KEY (spec_id) REFERENCES specs(id)
+                );
+
+                CREATE TABLE IF NOT EXISTS runs (
+                    id TEXT PRIMARY KEY,
+                    generation_id TEXT NOT NULL,
+                    target_url TEXT NOT NULL,
+                    status TEXT NOT NULL DEFAULT 'pending',
+                    passed INTEGER DEFAULT 0,
+                    failed INTEGER DEFAULT 0,
+                    skipped INTEGER DEFAULT 0,
+                    errors INTEGER DEFAULT 0,
+                    total INTEGER DEFAULT 0,
+                    duration REAL DEFAULT 0,
+                    junit_xml_path TEXT,
+                    results_json TEXT,
+                    created_at TEXT NOT NULL,
+                    completed_at TEXT,
+                    FOREIGN KEY (generation_id) REFERENCES generations(id)
+                );
+
+                CREATE INDEX IF NOT EXISTS idx_generations_spec_id ON generations(spec_id);
+                CREATE INDEX IF NOT EXISTS idx_runs_generation_id ON runs(generation_id);
+            """)
+
+    # Spec operations
+    def create_spec(
+        self, name: str, raw_content: str, title: str, version: str, endpoint_count: int
+    ) -> Spec:
+        """Create a new spec record"""
+        spec = Spec(
+            id=str(uuid.uuid4()),
+            name=name,
+            raw_content=raw_content,
+            title=title,
+            version=version,
+            endpoint_count=endpoint_count,
+            created_at=_utc_now(),
+        )
+
+        with self._get_connection() as conn:
+            conn.execute(
+                """INSERT INTO specs (id, name, raw_content, title, version, endpoint_count, created_at)
+                   VALUES (?, ?, ?, ?, ?, ?, ?)""",
+                (
+                    spec.id,
+                    spec.name,
+                    spec.raw_content,
+                    spec.title,
+                    spec.version,
+                    spec.endpoint_count,
+                    spec.created_at.isoformat(),
+                ),
+            )
+
+        return spec
+
+    def get_spec(self, spec_id: str) -> Optional[Spec]:
+        """Get a spec by ID"""
+        with self._get_connection() as conn:
+            row = conn.execute(
+                "SELECT * FROM specs WHERE id = ?", (spec_id,)
+            ).fetchone()
+
+            if row:
+                return Spec(
+                    id=row["id"],
+                    name=row["name"],
+                    raw_content=row["raw_content"],
+                    title=row["title"],
+                    version=row["version"],
+                    endpoint_count=row["endpoint_count"],
+                    created_at=datetime.fromisoformat(row["created_at"]),
+                )
+            return None
+
+    def list_specs(self) -> list[Spec]:
+        """List all specs"""
+        with self._get_connection() as conn:
+            rows = conn.execute(
+                "SELECT * FROM specs ORDER BY created_at DESC"
+            ).fetchall()
+
+            return [
+                Spec(
+                    id=row["id"],
+                    name=row["name"],
+                    raw_content=row["raw_content"],
+                    title=row["title"],
+                    version=row["version"],
+                    endpoint_count=row["endpoint_count"],
+                    created_at=datetime.fromisoformat(row["created_at"]),
+                )
+                for row in rows
+            ]
+
+    def delete_spec(self, spec_id: str) -> bool:
+        """Delete a spec by ID"""
+        with self._get_connection() as conn:
+            cursor = conn.execute("DELETE FROM specs WHERE id = ?", (spec_id,))
+            return cursor.rowcount > 0
+
+    # Generation operations
+    def create_generation(
+        self, spec_id: str, test_dir: str, files: list[str]
+    ) -> Generation:
+        """Create a new generation record"""
+        generation = Generation(
+            id=str(uuid.uuid4()),
+            spec_id=spec_id,
+            test_dir=test_dir,
+            files=files,
+            status="completed",
+            created_at=_utc_now(),
+        )
+
+        with self._get_connection() as conn:
+            conn.execute(
+                """INSERT INTO generations (id, spec_id, test_dir, files, status, created_at)
+                   VALUES (?, ?, ?, ?, ?, ?)""",
+                (
+                    generation.id,
+                    generation.spec_id,
+                    generation.test_dir,
+                    json.dumps(files),
+                    generation.status,
+                    generation.created_at.isoformat(),
+                ),
+            )
+
+        return generation
+
+    def get_generation(self, generation_id: str) -> Optional[Generation]:
+        """Get a generation by ID"""
+        with self._get_connection() as conn:
+            row = conn.execute(
+                "SELECT * FROM generations WHERE id = ?", (generation_id,)
+            ).fetchone()
+
+            if row:
+                return Generation(
+                    id=row["id"],
+                    spec_id=row["spec_id"],
+                    test_dir=row["test_dir"],
+                    files=json.loads(row["files"]),
+                    status=row["status"],
+                    error=row["error"],
+                    created_at=datetime.fromisoformat(row["created_at"]),
+                )
+            return None
+
+    def list_generations(self, spec_id: Optional[str] = None) -> list[Generation]:
+        """List generations, optionally filtered by spec_id"""
+        with self._get_connection() as conn:
+            if spec_id:
+                rows = conn.execute(
+                    "SELECT * FROM generations WHERE spec_id = ? ORDER BY created_at DESC",
+                    (spec_id,),
+                ).fetchall()
+            else:
+                rows = conn.execute(
+                    "SELECT * FROM generations ORDER BY created_at DESC"
+                ).fetchall()
+
+            return [
+                Generation(
+                    id=row["id"],
+                    spec_id=row["spec_id"],
+                    test_dir=row["test_dir"],
+                    files=json.loads(row["files"]),
+                    status=row["status"],
+                    error=row["error"],
+                    created_at=datetime.fromisoformat(row["created_at"]),
+                )
+                for row in rows
+            ]
+
+    def update_generation_status(
+        self, generation_id: str, status: str, error: Optional[str] = None
+    ) -> bool:
+        """Update generation status"""
+        with self._get_connection() as conn:
+            cursor = conn.execute(
+                "UPDATE generations SET status = ?, error = ? WHERE id = ?",
+                (status, error, generation_id),
+            )
+            return cursor.rowcount > 0
+
+    # Run operations
+    def create_run(self, generation_id: str, target_url: str) -> Run:
+        """Create a new run record"""
+        run = Run(
+            id=str(uuid.uuid4()),
+            generation_id=generation_id,
+            target_url=target_url,
+            status="pending",
+            created_at=_utc_now(),
+        )
+
+        with self._get_connection() as conn:
+            conn.execute(
+                """INSERT INTO runs (id, generation_id, target_url, status, created_at)
+                   VALUES (?, ?, ?, ?, ?)""",
+                (
+                    run.id,
+                    run.generation_id,
+                    run.target_url,
+                    run.status,
+                    run.created_at.isoformat(),
+                ),
+            )
+
+        return run
+
+    def get_run(self, run_id: str) -> Optional[Run]:
+        """Get a run by ID"""
+        with self._get_connection() as conn:
+            row = conn.execute("SELECT * FROM runs WHERE id = ?", (run_id,)).fetchone()
+
+            if row:
+                return Run(
+                    id=row["id"],
+                    generation_id=row["generation_id"],
+                    target_url=row["target_url"],
+                    status=row["status"],
+                    passed=row["passed"],
+                    failed=row["failed"],
+                    skipped=row["skipped"],
+                    errors=row["errors"],
+                    total=row["total"],
+                    duration=row["duration"],
+                    junit_xml_path=row["junit_xml_path"],
+                    results_json=row["results_json"],
+                    created_at=datetime.fromisoformat(row["created_at"]),
+                    completed_at=datetime.fromisoformat(row["completed_at"])
+                    if row["completed_at"]
+                    else None,
+                )
+            return None
+
+    def list_runs(self, generation_id: Optional[str] = None) -> list[Run]:
+        """List runs, optionally filtered by generation_id"""
+        with self._get_connection() as conn:
+            if generation_id:
+                rows = conn.execute(
+                    "SELECT * FROM runs WHERE generation_id = ? ORDER BY created_at DESC",
+                    (generation_id,),
+                ).fetchall()
+            else:
+                rows = conn.execute(
+                    "SELECT * FROM runs ORDER BY created_at DESC"
+                ).fetchall()
+
+            return [
+                Run(
+                    id=row["id"],
+                    generation_id=row["generation_id"],
+                    target_url=row["target_url"],
+                    status=row["status"],
+                    passed=row["passed"],
+                    failed=row["failed"],
+                    skipped=row["skipped"],
+                    errors=row["errors"],
+                    total=row["total"],
+                    duration=row["duration"],
+                    junit_xml_path=row["junit_xml_path"],
+                    results_json=row["results_json"],
+                    created_at=datetime.fromisoformat(row["created_at"]),
+                    completed_at=datetime.fromisoformat(row["completed_at"])
+                    if row["completed_at"]
+                    else None,
+                )
+                for row in rows
+            ]
+
+    def update_run(
+        self,
+        run_id: str,
+        status: str,
+        passed: int = 0,
+        failed: int = 0,
+        skipped: int = 0,
+        errors: int = 0,
+        total: int = 0,
+        duration: float = 0.0,
+        junit_xml_path: Optional[str] = None,
+        results_json: Optional[str] = None,
+    ) -> bool:
+        """Update run with results"""
+        completed_at = (
+            _utc_now().isoformat() if status in ("completed", "failed") else None
+        )
+
+        with self._get_connection() as conn:
+            cursor = conn.execute(
+                """UPDATE runs SET
+                   status = ?, passed = ?, failed = ?, skipped = ?, errors = ?,
+                   total = ?, duration = ?, junit_xml_path = ?, results_json = ?,
+                   completed_at = ?
+                   WHERE id = ?""",
+                (
+                    status,
+                    passed,
+                    failed,
+                    skipped,
+                    errors,
+                    total,
+                    duration,
+                    junit_xml_path,
+                    results_json,
+                    completed_at,
+                    run_id,
+                ),
+            )
+            return cursor.rowcount > 0
+
+
+# Global database instance
+_db: Optional[Database] = None
+
+
+def get_database(db_path: Optional[Path] = None) -> Database:
+    """Get or create the global database instance"""
+    global _db
+    if _db is None or db_path is not None:
+        _db = Database(db_path or Path("./data/app.db"))
+    return _db
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..af2dca02
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,48 @@
+version: '3.8'
+
+services:
+  # FastAPI backend
+  api:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./data:/app/data
+      - ./generated_tests:/app/generated_tests
+    environment:
+      - DATABASE_PATH=/app/data/app.db
+      - GENERATED_TESTS_DIR=/app/generated_tests
+      - DEFAULT_TARGET_URL=http://example-api:8001
+    depends_on:
+      - example-api
+
+  # Streamlit UI
+  ui:
+    build:
+      context: .
+      dockerfile: Dockerfile.streamlit
+    ports:
+      - "8501:8501"
+    volumes:
+      - ./data:/app/data
+      - ./generated_tests:/app/generated_tests
+    environment:
+      - DATABASE_PATH=/app/data/app.db
+      - GENERATED_TESTS_DIR=/app/generated_tests
+      - DEFAULT_TARGET_URL=http://example-api:8001
+    depends_on:
+      - api
+
+  # Example target API for testing
+  example-api:
+    build:
+      context: .
+      dockerfile: Dockerfile.example
+    ports:
+      - "8001:8001"
+
+volumes:
+  data:
+  generated_tests:
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..b74e9990
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,106 @@
+# OpenAPI Test Generator - Documentation
+
+## Quick Links
+
+- [Main README](../README.md) - Quick start guide
+- [Setup Guide](SETUP.md) - Detailed setup instructions
+- [Codegen Log](codegen-log.md) - AI tool usage documentation
+
+## Overview
+
+Generate and run pytest tests from OpenAPI specifications.
+
+### Features
+
+- **Upload OpenAPI Specs**: Support for YAML and JSON formats
+- **Generate Tests**: Automatically create pytest files with happy path and error tests
+- **Run Tests**: Execute tests against any target API
+- **View Results**: Detailed pass/fail reporting with failure details
+- **Two Modes**: Mock LLM (deterministic) or Real LLM (optional)
+
+## Architecture
+
+```
+┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
+│  Streamlit UI   │────▶│  FastAPI API    │────▶│   SQLite DB     │
+│  (port 8501)    │     │  (port 8000)    │     │                 │
+└─────────────────┘     └─────────────────┘     └─────────────────┘
+                               │
+                               ▼
+                        ┌─────────────────┐
+                        │  Test Generator │
+                        │  (Mock/Real LLM)│
+                        └─────────────────┘
+                               │
+                               ▼
+                        ┌─────────────────┐
+                        │  Pytest Runner  │────▶ Target API
+                        └─────────────────┘
+```
+
+## Project Structure
+
+```
+spec-driven-development/
+├── run.py               # Simple runner script (start here!)
+├── app/                 # FastAPI backend
+│   ├── main.py         # Entry point
+│   ├── routes.py       # API endpoints
+│   ├── openapi_parser.py
+│   ├── generator/      # Test generation
+│   ├── runner/         # Test execution
+│   └── storage/        # SQLite database
+├── streamlit_app/       # Streamlit UI
+├── example_api/         # Target API for testing
+├── tests/               # Test suite (67 tests)
+├── SPECS/               # Feature specifications
+├── openapi_specs/       # Sample specs
+└── docs/                # Documentation
+```
+
+## API Endpoints
+
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| GET | /health | Health check |
+| POST | /specs | Upload OpenAPI spec |
+| GET | /specs | List all specs |
+| GET | /specs/{id} | Get spec details |
+| DELETE | /specs/{id} | Delete spec |
+| POST | /generate | Generate tests |
+| GET | /generations/{id} | Get generation details |
+| POST | /runs | Run tests |
+| GET | /runs/{id} | Get run results |
+| GET | /runs/{id}/junit | Get JUnit XML |
+
+## Configuration
+
+Environment variables:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| DATABASE_PATH | ./data/app.db | SQLite database path |
+| GENERATED_TESTS_DIR | ./generated_tests | Output directory |
+| DEFAULT_TARGET_URL | http://localhost:8001 | Default API URL |
+| LLM_API_KEY | (none) | Enable real LLM mode |
+
+## Mock vs Real LLM
+
+**Mock Mode (default)**:
+- Deterministic test generation
+- No external API calls
+- Ideal for CI/CD
+
+**Real Mode** (set LLM_API_KEY):
+- Uses OpenAI/Anthropic for generation
+- More varied test scenarios
+- Requires API key
+
+## Spec-Driven Development
+
+This project follows spec-first development:
+- Every feature has a spec in `SPECS/`
+- Implementation follows the spec
+- Acceptance criteria are checked off
+
+See [RULES.md](../RULES.md) for details.
diff --git a/docs/SETUP.md b/docs/SETUP.md
new file mode 100644
index 00000000..e33add2a
--- /dev/null
+++ b/docs/SETUP.md
@@ -0,0 +1,192 @@
+# OpenAPI Test Generator - Setup & Usage Guide
+
+A tool that generates and runs API tests from OpenAPI specifications.
+
+---
+
+## LLM Providers
+
+This tool supports multiple LLM providers for test generation:
+
+| Provider | Description | API Key Required |
+|----------|-------------|------------------|
+| **Mock** (default) | Deterministic generation, no API calls | No |
+| **OpenAI** | GPT-4, GPT-4o, GPT-3.5 | Yes |
+| **Anthropic** | Claude 3 (Haiku, Sonnet, Opus) | Yes |
+
+### Setting Up API Keys
+
+**Option 1: Environment Variables (Recommended)**
+```bash
+# For OpenAI
+export OPENAI_API_KEY="your-openai-key"
+
+# For Anthropic
+export ANTHROPIC_API_KEY="your-anthropic-key"
+
+# Optional: Set default provider
+export LLM_PROVIDER="openai"  # or "anthropic" or "mock"
+```
+
+**Option 2: Enter in UI**
+- Select provider in the Generate Tests page
+- Enter your API key directly (not stored)
+
+---
+
+## Quick Start (3 Steps)
+
+### Step 1: Install Python
+Make sure you have **Python 3.10 or higher** installed.
+
+Check your version:
+```bash
+python --version
+```
+
+If you don't have Python, download it from: https://www.python.org/downloads/
+
+### Step 2: Install Dependencies
+Open a terminal/command prompt in this folder and run:
+
+```bash
+python run.py setup
+```
+
+### Step 3: Start the Application
+```bash
+python run.py all
+```
+
+Then open your browser to: **http://localhost:8501**
+
+That's it! You're ready to use the application.
+
+---
+
+## How to Use the Application
+
+### 1. Upload an OpenAPI Spec
+- Go to the **"Upload Spec"** page in the sidebar
+- Paste your OpenAPI YAML/JSON or click **"Load Example"**
+- Click **"Save Spec"**
+
+### 2. Generate Tests
+- Go to the **"Generate Tests"** page
+- Select your saved spec from the dropdown
+- Click **"Generate Tests"**
+- View the generated test code
+
+### 3. Run Tests
+- Go to the **"Run Tests"** page
+- Select a generation to run
+- Enter your target API URL (or use the default example)
+- Click **"Run Tests"**
+- View results: passed, failed, and failure details
+
+---
+
+## Available Commands
+
+| Command | Description |
+|---------|-------------|
+| `python run.py setup` | Install all dependencies |
+| `python run.py all` | Start all services (recommended) |
+| `python run.py ui` | Start only the web UI |
+| `python run.py api` | Start only the backend API |
+| `python run.py example` | Start only the example target API |
+| `python run.py test` | Run all tests |
+| `python run.py clean` | Clean generated files |
+| `python run.py help` | Show help message |
+
+---
+
+## Service URLs
+
+When running `python run.py all`:
+
+| Service | URL | Description |
+|---------|-----|-------------|
+| Web UI | http://localhost:8501 | Main user interface |
+| Backend API | http://localhost:8000 | FastAPI backend |
+| Example API | http://localhost:8001 | Sample API for testing |
+
+---
+
+## Troubleshooting
+
+### "Python not found"
+- Make sure Python is installed and added to your PATH
+- On Windows, try `py` instead of `python`
+
+### "Port already in use"
+- Another application is using the port
+- Stop other services or change the port in the command
+
+### "Module not found"
+- Run `python run.py setup` to install dependencies
+
+### Tests failing on Windows
+- This is normal for file cleanup - the tests themselves pass
+- SQLite files may be locked temporarily
+
+---
+
+## Project Structure
+
+```
+spec-driven-development/
+├── run.py              # Simple runner script (use this!)
+├── app/                # FastAPI backend
+├── streamlit_app/      # Web UI
+├── example_api/        # Sample API for testing
+├── tests/              # Test suite
+├── openapi_specs/      # Example OpenAPI specs
+└── requirements.txt    # Python dependencies
+```
+
+---
+
+## For Developers
+
+### Running Individual Services
+
+If you prefer to run services separately:
+
+**Terminal 1 - Backend API:**
+```bash
+python run.py api
+```
+
+**Terminal 2 - Example API:**
+```bash
+python run.py example
+```
+
+**Terminal 3 - Web UI:**
+```bash
+python run.py ui
+```
+
+### Running Tests
+
+```bash
+python run.py test
+```
+
+### Using Make (Mac/Linux only)
+
+If you have `make` installed:
+```bash
+make install    # Install dependencies
+make dev        # Run backend
+make ui         # Run UI
+make test       # Run tests
+```
+
+---
+
+## Need Help?
+
+- Check the API docs: http://localhost:8000/docs (when API is running)
+- Review the example spec in `openapi_specs/example_openapi.yaml`
diff --git a/docs/WALKTHROUGH.md b/docs/WALKTHROUGH.md
new file mode 100644
index 00000000..b2784b46
--- /dev/null
+++ b/docs/WALKTHROUGH.md
@@ -0,0 +1,667 @@
+# OpenAPI Test Generator - Complete Walkthrough
+
+Hey there! This guide will walk you through using the OpenAPI Test Generator from scratch. Whether you're a developer or just someone who wants to automate API testing, this doc has got you covered.
+
+## Table of Contents
+
+- [What Does This Thing Do?](#what-does-this-thing-do)
+- [Before You Start](#before-you-start)
+- [Starting Up the Services](#starting-up-the-services)
+- [Using the Web Interface](#using-the-web-interface)
+  - [Page 1: Upload Spec](#page-1-upload-spec)
+  - [Page 2: Generate Tests](#page-2-generate-tests)
+  - [Page 3: Run Tests](#page-3-run-tests)
+- [Understanding What the Tests Actually Do](#understanding-what-the-tests-actually-do)
+- [The Example API Endpoints](#the-example-api-endpoints)
+- [Troubleshooting](#troubleshooting)
+- [CLI Usage (No UI Needed)](#cli-usage-no-ui-needed)
+- [API Endpoints Reference](#api-endpoints-reference)
+- [Environment Variables](#environment-variables)
+- [Running Tests Directly with pytest](#running-tests-directly-with-pytest)
+- [One-Liner Workflow (Advanced)](#one-liner-workflow-advanced)
+- [Project Structure](#project-structure-for-the-curious)
+- [Quick Reference](#quick-reference)
+
+## What Does This Thing Do?
+
+In simple terms: you give it an OpenAPI spec (that YAML/JSON file describing your API), and it spits out working pytest tests. Then you can run those tests against any API server to see if things work as expected.
+
+The whole flow looks like this:
+
+```
+OpenAPI Spec (YAML/JSON)
+        |
+        v
+   [Upload to App]
+        |
+        v
+   [Generate Tests]  <-- Uses Mock LLM or Real LLM (OpenAI/Anthropic)
+        |
+        v
+   [Run Tests]
+        |
+        v
+   Results (Pass/Fail with details)
+```
+
+---
+
+## Before You Start
+
+### What You Need Installed
+
+1. **Python 3.10+** - Check with `python --version`
+2. **pip** - Comes with Python, but verify with `pip --version`
+
+### Getting the Code Ready
+
+Open your terminal and navigate to the project folder:
+
+```bash
+cd c:\Users\charan4170\ai-test-project\spec-driven-development
+```
+
+Install all the dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+This grabs everything you need - FastAPI, Streamlit, pytest, httpx, and all the other bits.
+
+---
+
+## Starting Up the Services
+
+You'll need **two terminals** open for this. Think of it like having two tabs - one runs your test API, the other runs the web interface.
+
+### Terminal 1: Start the Example API
+
+This is a simple API server that we'll test against. It's got a few endpoints like creating items, fetching items, etc.
+
+```bash
+cd c:\Users\charan4170\ai-test-project\spec-driven-development
+python run.py example
+```
+
+You should see something like:
+
+```
+INFO:     Uvicorn running on http://127.0.0.1:8001
+INFO:     Started reloader process
+```
+
+Leave this running. Don't close this terminal!
+
+### Terminal 2: Start the Streamlit UI
+
+Open a new terminal window (keep the first one running) and do:
+
+```bash
+cd c:\Users\charan4170\ai-test-project\spec-driven-development
+python run.py ui
+```
+
+You'll see:
+
+```
+  You can now view your Streamlit app in your browser.
+  Local URL: http://localhost:8501
+```
+
+Now open your browser and go to **http://localhost:8501**
+
+---
+
+## Using the Web Interface
+
+The Streamlit UI has three main pages. You'll go through them in order: Upload → Generate → Run.
+
+### Page 1: Upload Spec
+
+This is where you feed the app your OpenAPI specification.
+
+**Option A: Use the Built-in Example (Easiest)**
+
+1. Click "Load Example Spec" button
+2. You'll see the spec appear in the text area
+3. The right side shows a preview of all the endpoints found
+4. Give it a name (or keep the default)
+5. Click "Save Spec"
+
+**Option B: Paste Your Own Spec**
+
+1. Copy your OpenAPI YAML or JSON content
+2. Paste it into the big text area on the left
+3. Check the preview on the right - make sure it parsed correctly
+4. Enter a name for this spec
+5. Click "Save Spec"
+
+**Option C: Upload a File**
+
+1. Use the file uploader at the top
+2. Select your `.yaml` or `.json` file
+3. Preview shows up on the right
+4. Name it and save
+
+After saving, you'll see a green success message with a Spec ID. The app remembers this spec for later.
+
+### Page 2: Generate Tests
+
+Click on "Generate Tests" in the sidebar to go to this page.
+
+**Step 1: Pick Your Spec**
+
+Use the dropdown at the top to select the spec you just uploaded. If you only have one, it's already selected.
+
+**Step 2: Choose Your LLM Provider**
+
+This is where it gets interesting. You have three options:
+
+| Provider | What It Does | Needs API Key? |
+|----------|--------------|----------------|
+| **Mock (Deterministic)** | Generates predictable, reliable tests based on your spec structure | No |
+| **OpenAI** | Uses GPT to generate more creative tests | Yes - needs `OPENAI_API_KEY` |
+| **Anthropic** | Uses Claude to generate tests | Yes - needs `ANTHROPIC_API_KEY` |
+
+**My recommendation:** Start with **Mock**. It's free, fast, and generates tests that actually work. The real LLMs can sometimes make wrong assumptions about your API.
+
+If you want to use OpenAI or Anthropic:
+1. Select the provider from the dropdown
+2. Paste your API key in the text field that appears
+3. The key is only used for this session - it's not stored anywhere
+
+**Step 3: Generate!**
+
+Click the big "Generate Tests" button. You'll see a progress spinner, and then:
+
+- A success message with the Generation ID
+- A list of generated test files
+- Expandable sections showing the actual test code
+
+Take a look at the generated code if you're curious. Each test file contains pytest functions that make HTTP requests to your API and check the responses.
+
+### Page 3: Run Tests
+
+Click "Run Tests" in the sidebar.
+
+**Step 1: Select Generation**
+
+Pick the test generation you want to run from the dropdown. It shows the spec name and when it was generated.
+
+**Step 2: Set Target URL**
+
+This is the URL of the API you're testing against.
+
+- If you started the example API earlier, keep the default: `http://localhost:8001`
+- If you're testing a different API, enter its URL here
+
+**Step 3: Run!**
+
+Click "Run Tests" and wait. The tests will execute, and you'll see:
+
+**Results Summary**
+
+Five boxes showing:
+- **Passed** (green) - Tests that worked
+- **Failed** (red) - Tests that didn't work
+- **Skipped** - Tests that were skipped
+- **Errors** - Tests that crashed
+- **Duration** - How long it took
+
+**If Everything Passed:**
+
+You'll see a green "All tests passed!" message. Nice work!
+
+**If Something Failed:**
+
+Each failure shows up in an expandable section with:
+- The test name
+- What went wrong (error message)
+- The full traceback (useful for debugging)
+
+**Test Output**
+
+Expand "Test Output" to see the raw stdout/stderr from pytest. This is helpful when things go sideways.
+
+**Download Options**
+
+- **JUnit XML** - Standard test report format, works with CI tools like Jenkins
+- **Results JSON** - All the details in JSON format
+
+---
+
+## Understanding What the Tests Actually Do
+
+The generated tests cover three scenarios for each endpoint:
+
+### 1. Happy Path Tests (`*_success`)
+
+These test that your API works when given valid input:
+
+```python
+def test_create_item_success(client):
+    """Test successful POST /items"""
+    response = client.post("/items", json={"name": "Widget", "price": 9.99})
+    assert response.status_code == 201
+    assert response.json() is not None
+```
+
+### 2. Not Found Tests (`*_not_found`)
+
+These test that your API returns 404 for missing resources:
+
+```python
+def test_get_item_not_found(client):
+    """Test GET /items/{id} with non-existent resource"""
+    response = client.get("/items/nonexistent-id-12345")
+    assert response.status_code == 404
+```
+
+### 3. Validation Error Tests (`*_validation_error`)
+
+These test that your API rejects bad input:
+
+```python
+def test_create_item_validation_error(client):
+    """Test POST /items with missing required fields"""
+    response = client.post("/items", json={})
+    assert response.status_code == 422
+```
+
+---
+
+## The Example API Endpoints
+
+If you're using the built-in example API, here's what it supports:
+
+| Method | Path | What It Does |
+|--------|------|--------------|
+| GET | `/health` | Returns `{"status": "ok"}` |
+| POST | `/items` | Creates a new item with `name` and `price` |
+| GET | `/items/{id}` | Gets an item by ID |
+| DELETE | `/items/{id}` | Deletes an item by ID |
+
+The API stores items in memory, so they disappear when you restart it.
+
+---
+
+## Troubleshooting
+
+### "collected 0 items" - No Tests Found
+
+This usually means the test files weren't generated properly. Try:
+1. Go back to "Generate Tests" page
+2. Generate tests again
+3. Make sure you see the test files listed
+
+### Tests Fail with Connection Errors
+
+The target API isn't running or the URL is wrong:
+1. Check that Terminal 1 (example API) is still running
+2. Verify the URL is `http://localhost:8001`
+3. Try opening that URL in your browser - you should see `{"detail":"Not Found"}`
+
+### OpenAI/Anthropic Tests Fail
+
+Real LLMs sometimes generate tests with wrong assumptions. Common issues:
+- Expecting `error` field instead of `detail`
+- Assuming items already exist
+- Wrong status codes
+
+**Solution:** Use Mock mode instead. It generates tests that match FastAPI conventions.
+
+### "openai package is required" Error
+
+You selected OpenAI but don't have the package:
+```bash
+pip install openai
+```
+
+Same for Anthropic:
+```bash
+pip install anthropic
+```
+
+### Database Issues
+
+If things get weird, you can reset the database:
+```bash
+del data\app.db
+```
+
+Then restart the UI.
+
+---
+
+## CLI Usage (No UI Needed)
+
+Prefer the command line? Here's how to do everything without touching the browser.
+
+### Step 1: Start the Backend API
+
+```bash
+cd c:\Users\charan4170\ai-test-project\spec-driven-development
+python run.py api
+```
+
+This starts the FastAPI backend on `http://localhost:8000`. Keep this terminal open.
+
+### Step 2: Start the Example API (for testing)
+
+Open another terminal:
+
+```bash
+cd c:\Users\charan4170\ai-test-project\spec-driven-development
+python run.py example
+```
+
+Now you have the target API on `http://localhost:8001`.
+
+### Step 3: Upload a Spec
+
+Using `curl` (or any HTTP client):
+
+```bash
+# Upload the example spec
+curl -X POST http://localhost:8000/specs \
+  -H "Content-Type: application/json" \
+  -d "{\"name\": \"My API\", \"content\": \"$(cat openapi_specs/example_openapi.yaml)\"}"
+```
+
+Or with PowerShell:
+
+```powershell
+$spec = Get-Content -Raw openapi_specs\example_openapi.yaml
+$body = @{ name = "My API"; content = $spec } | ConvertTo-Json
+Invoke-RestMethod -Uri http://localhost:8000/specs -Method POST -Body $body -ContentType "application/json"
+```
+
+Response:
+```json
+{
+  "id": "abc123-def456-...",
+  "name": "My API",
+  "title": "Example API",
+  "version": "1.0.0",
+  "endpoint_count": 4,
+  "created_at": "2024-01-15T10:30:00"
+}
+```
+
+Save that `id` - you'll need it.
+
+### Step 4: Generate Tests
+
+```bash
+curl -X POST http://localhost:8000/generate \
+  -H "Content-Type: application/json" \
+  -d "{\"spec_id\": \"YOUR_SPEC_ID_HERE\"}"
+```
+
+PowerShell:
+```powershell
+$body = @{ spec_id = "YOUR_SPEC_ID_HERE" } | ConvertTo-Json
+Invoke-RestMethod -Uri http://localhost:8000/generate -Method POST -Body $body -ContentType "application/json"
+```
+
+Response:
+```json
+{
+  "id": "gen-789...",
+  "spec_id": "abc123...",
+  "status": "completed",
+  "files": [
+    "C:\\...\\generated_tests\\abc123...\\conftest.py",
+    "C:\\...\\generated_tests\\abc123...\\test_items.py"
+  ],
+  "created_at": "2024-01-15T10:31:00"
+}
+```
+
+### Step 5: Run Tests
+
+```bash
+curl -X POST http://localhost:8000/runs \
+  -H "Content-Type: application/json" \
+  -d "{\"generation_id\": \"YOUR_GENERATION_ID\", \"target_url\": \"http://localhost:8001\"}"
+```
+
+PowerShell:
+```powershell
+$body = @{ generation_id = "YOUR_GENERATION_ID"; target_url = "http://localhost:8001" } | ConvertTo-Json
+Invoke-RestMethod -Uri http://localhost:8000/runs -Method POST -Body $body -ContentType "application/json"
+```
+
+Response:
+```json
+{
+  "id": "run-999...",
+  "generation_id": "gen-789...",
+  "status": "completed",
+  "passed": 6,
+  "failed": 0,
+  "skipped": 0,
+  "errors": 0,
+  "total": 6,
+  "duration": 1.23,
+  "created_at": "2024-01-15T10:32:00"
+}
+```
+
+### Step 6: Get Detailed Results
+
+```bash
+curl http://localhost:8000/runs/YOUR_RUN_ID
+```
+
+This returns the full details including any failure messages and tracebacks.
+
+### Step 7: Get JUnit XML (for CI)
+
+```bash
+curl http://localhost:8000/runs/YOUR_RUN_ID/junit > results.xml
+```
+
+---
+
+## API Endpoints Reference
+
+All endpoints are available at `http://localhost:8000`
+
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| GET | `/health` | Health check - returns `{"status": "ok"}` |
+| POST | `/specs` | Upload a new OpenAPI spec |
+| GET | `/specs` | List all uploaded specs |
+| GET | `/specs/{id}` | Get spec details with endpoints |
+| DELETE | `/specs/{id}` | Delete a spec |
+| POST | `/generate` | Generate tests for a spec |
+| GET | `/generations` | List all generations |
+| GET | `/generations/{id}` | Get generation details |
+| POST | `/runs` | Run tests for a generation |
+| GET | `/runs` | List all runs |
+| GET | `/runs/{id}` | Get run details with failures |
+| GET | `/runs/{id}/junit` | Get JUnit XML report |
+
+---
+
+## Environment Variables
+
+You can configure the app using these environment variables:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `LLM_PROVIDER` | `mock` | Which LLM to use: `mock`, `openai`, or `anthropic` |
+| `OPENAI_API_KEY` | - | Your OpenAI API key |
+| `ANTHROPIC_API_KEY` | - | Your Anthropic API key |
+| `DEFAULT_TARGET_URL` | `http://localhost:8001` | Default URL for running tests |
+| `DATABASE_PATH` | `data/app.db` | Where to store the SQLite database |
+| `GENERATED_TESTS_DIR` | `generated_tests` | Where to write generated test files |
+| `TEST_TIMEOUT` | `300` | Max seconds for test execution |
+
+Example:
+```bash
+# Windows
+set LLM_PROVIDER=openai
+set OPENAI_API_KEY=sk-your-key-here
+python run.py api
+
+# Linux/Mac
+export LLM_PROVIDER=openai
+export OPENAI_API_KEY=sk-your-key-here
+python run.py api
+```
+
+---
+
+## Running Tests Directly with pytest
+
+Don't want to use the API at all? You can run generated tests directly:
+
+```bash
+cd c:\Users\charan4170\ai-test-project\spec-driven-development
+
+# Run the project's own tests (to verify everything works)
+python -m pytest tests/ -v
+
+# Run generated tests against your API
+set TARGET_BASE_URL=http://localhost:8001
+python -m pytest generated_tests\<spec-id>\ -v
+
+# Run with JUnit output for CI
+python -m pytest generated_tests\<spec-id>\ --junitxml=results.xml -v
+```
+
+---
+
+## One-Liner Workflow (Advanced)
+
+For scripting or CI pipelines, here's a complete workflow in PowerShell:
+
+```powershell
+# Variables
+$API_URL = "http://localhost:8000"
+$TARGET_URL = "http://localhost:8001"
+$SPEC_FILE = "openapi_specs\example_openapi.yaml"
+
+# 1. Upload spec
+$spec = Get-Content -Raw $SPEC_FILE
+$uploadBody = @{ name = "CI Test"; content = $spec } | ConvertTo-Json -Depth 10
+$specResult = Invoke-RestMethod -Uri "$API_URL/specs" -Method POST -Body $uploadBody -ContentType "application/json"
+$specId = $specResult.id
+Write-Host "Uploaded spec: $specId"
+
+# 2. Generate tests
+$genBody = @{ spec_id = $specId } | ConvertTo-Json
+$genResult = Invoke-RestMethod -Uri "$API_URL/generate" -Method POST -Body $genBody -ContentType "application/json"
+$genId = $genResult.id
+Write-Host "Generated tests: $genId"
+
+# 3. Run tests
+$runBody = @{ generation_id = $genId; target_url = $TARGET_URL } | ConvertTo-Json
+$runResult = Invoke-RestMethod -Uri "$API_URL/runs" -Method POST -Body $runBody -ContentType "application/json"
+
+# 4. Check results
+if ($runResult.failed -eq 0 -and $runResult.errors -eq 0) {
+    Write-Host "All tests passed! ($($runResult.passed) passed)"
+    exit 0
+} else {
+    Write-Host "Tests failed: $($runResult.failed) failed, $($runResult.errors) errors"
+    exit 1
+}
+```
+
+And for bash:
+
+```bash
+#!/bin/bash
+API_URL="http://localhost:8000"
+TARGET_URL="http://localhost:8001"
+SPEC_FILE="openapi_specs/example_openapi.yaml"
+
+# 1. Upload spec
+SPEC_CONTENT=$(cat "$SPEC_FILE" | jq -Rs .)
+SPEC_RESULT=$(curl -s -X POST "$API_URL/specs" \
+  -H "Content-Type: application/json" \
+  -d "{\"name\": \"CI Test\", \"content\": $SPEC_CONTENT}")
+SPEC_ID=$(echo $SPEC_RESULT | jq -r '.id')
+echo "Uploaded spec: $SPEC_ID"
+
+# 2. Generate tests
+GEN_RESULT=$(curl -s -X POST "$API_URL/generate" \
+  -H "Content-Type: application/json" \
+  -d "{\"spec_id\": \"$SPEC_ID\"}")
+GEN_ID=$(echo $GEN_RESULT | jq -r '.id')
+echo "Generated tests: $GEN_ID"
+
+# 3. Run tests
+RUN_RESULT=$(curl -s -X POST "$API_URL/runs" \
+  -H "Content-Type: application/json" \
+  -d "{\"generation_id\": \"$GEN_ID\", \"target_url\": \"$TARGET_URL\"}")
+
+# 4. Check results
+FAILED=$(echo $RUN_RESULT | jq -r '.failed')
+ERRORS=$(echo $RUN_RESULT | jq -r '.errors')
+PASSED=$(echo $RUN_RESULT | jq -r '.passed')
+
+if [ "$FAILED" -eq 0 ] && [ "$ERRORS" -eq 0 ]; then
+    echo "All tests passed! ($PASSED passed)"
+    exit 0
+else
+    echo "Tests failed: $FAILED failed, $ERRORS errors"
+    exit 1
+fi
+```
+
+---
+
+## Project Structure (For the Curious)
+
+```
+spec-driven-development/
+├── app/                      # Main application code
+│   ├── generator/            # Test generation logic
+│   │   ├── llm_client.py     # Mock, OpenAI, Anthropic clients
+│   │   └── test_generator.py # Generates pytest files
+│   ├── runner/               # Test execution
+│   │   ├── pytest_runner.py  # Runs pytest programmatically
+│   │   └── junit_parser.py   # Parses test results
+│   ├── storage/              # Database
+│   │   └── db.py             # SQLite storage
+│   ├── openapi_parser.py     # Parses OpenAPI specs
+│   ├── routes.py             # FastAPI endpoints
+│   └── config.py             # Configuration
+├── streamlit_app/            # Web UI
+│   ├── app.py                # Main entry
+│   └── pages/                # UI pages
+├── example_api/              # Sample API for testing
+├── generated_tests/          # Where tests get written
+├── data/                     # SQLite database
+├── tests/                    # Project's own tests
+└── run.py                    # Helper script to start things
+```
+
+---
+
+## Quick Reference
+
+| Command | What It Does |
+|---------|--------------|
+| `python run.py example` | Start the example API on port 8001 |
+| `python run.py ui` | Start the Streamlit UI on port 8501 |
+| `python run.py api` | Start the FastAPI backend on port 8000 |
+| `python run.py test` | Run the project's own test suite |
+
+---
+
+## Need Help?
+
+If you run into issues:
+
+1. Check the terminal windows for error messages
+2. Look at the "Test Output" section in the Run page
+3. Try regenerating tests with Mock mode
+4. Reset the database if things get stuck
+
+Happy testing!
diff --git a/docs/codegen-log.md b/docs/codegen-log.md
new file mode 100644
index 00000000..b4b98e76
--- /dev/null
+++ b/docs/codegen-log.md
@@ -0,0 +1,116 @@
+# Code Generation Log
+
+This document tracks all AI-assisted code generation used in this project.
+
+## Tools Used
+
+- **Claude Code** (Anthropic Claude Opus 4.5) - Primary code generation tool
+
+## Generation Sessions
+
+### Session 1: Project Structure and Core Implementation
+
+**Date:** 2026-02-06
+
+**Tool:** Claude Code (Claude Opus 4.5)
+
+**What was generated:**
+
+1. **Feature Specifications** (`SPECS/*.md`)
+   - All 6 feature specs created from requirements
+   - Includes acceptance criteria for each feature
+
+2. **Example API** (`example_api/main.py`)
+   - FastAPI application with CRUD endpoints
+   - In-memory storage for items
+
+3. **OpenAPI Parser** (`app/openapi_parser.py`)
+   - YAML/JSON parsing
+   - Schema normalization
+   - Reference resolution
+
+4. **Test Generator** (`app/generator/`)
+   - LLMClient interface and MockLLMClient
+   - Test plan generation
+   - Pytest file compilation
+
+5. **Test Runner** (`app/runner/`)
+   - Pytest execution wrapper
+   - JUnit XML parser
+   - Result aggregation
+
+6. **Storage Layer** (`app/storage/db.py`)
+   - SQLite database schema
+   - Repository functions for specs, generations, runs
+
+7. **FastAPI Routes** (`app/routes.py`)
+   - REST API endpoints
+   - Request/response models
+
+8. **Streamlit UI** (`streamlit_app/`)
+   - Multi-page application
+   - Upload, generate, run pages
+
+9. **Tests** (`tests/`)
+   - Unit tests for parser, generator, junit_parser
+   - Integration tests for API endpoints
+   - E2E workflow tests
+
+10. **Build Configuration**
+    - Makefile
+    - Dockerfiles (API, Streamlit, Example)
+    - docker-compose.yml
+    - GitHub Actions CI
+
+**Validation/Refinement:**
+
+- All generated code was reviewed for:
+  - Syntax validity (Python AST parsing)
+  - Consistent coding style
+  - Proper error handling
+  - Documentation coverage
+
+- Test coverage verified through:
+  - Unit tests for core components
+  - Integration tests for API
+  - E2E tests for full workflow
+
+## Prompts Used
+
+### Initial Planning
+```
+Create a FastAPI service that:
+1. Ingests OpenAPI specs
+2. Generates pytest tests (LLM-assisted)
+3. Runs tests against a target API
+4. Returns results with pass/fail, failures, logs
+
+Key requirements:
+- Mock mode (deterministic) for CI
+- Real LLM mode (optional)
+- SQLite storage
+- Streamlit UI
+```
+
+### Implementation
+Each component was implemented following the spec-first workflow:
+1. Write feature spec in SPECS/
+2. Implement according to spec
+3. Add unit tests
+4. Verify acceptance criteria
+
+## Key Decisions Made by AI
+
+1. **Mock-first approach**: MockLLMClient generates deterministic tests based on endpoint structure, not random generation
+
+2. **Template-based generation**: Tests are generated from structured test plans, not raw LLM output, ensuring consistency
+
+3. **Separation of concerns**: Clear boundaries between parser, generator, runner, and storage
+
+4. **Sync execution**: Tests run synchronously to simplify the architecture (no background workers)
+
+## Files NOT Generated by AI
+
+- `RULES.md` - Project rules (pre-existing)
+- `TODO.md` - Task tracking (pre-existing)
+- `openapi_specs/example_openapi.yaml` - Hand-crafted to match example API
diff --git a/example_api/__init__.py b/example_api/__init__.py
new file mode 100644
index 00000000..76840b59
--- /dev/null
+++ b/example_api/__init__.py
@@ -0,0 +1 @@
+# Example API package
diff --git a/example_api/main.py b/example_api/main.py
new file mode 100644
index 00000000..60447e2a
--- /dev/null
+++ b/example_api/main.py
@@ -0,0 +1,100 @@
+"""
+Example Target API for testing the OpenAPI Test Generator.
+
+This is a simple FastAPI application with basic CRUD operations
+that serves as the target for generated tests.
+"""
+
+from fastapi import FastAPI, HTTPException, status
+from pydantic import BaseModel
+from typing import Optional
+import uuid
+
+app = FastAPI(
+    title="Example Items API",
+    description="A simple API for managing items - used as a test target",
+    version="1.0.0",
+)
+
+# In-memory storage
+items_db: dict[str, dict] = {}
+
+
+class ItemCreate(BaseModel):
+    """Request body for creating an item"""
+
+    name: str
+    price: float
+    description: Optional[str] = None
+
+
+class Item(BaseModel):
+    """Item response model"""
+
+    id: str
+    name: str
+    price: float
+    description: Optional[str] = None
+
+
+class HealthResponse(BaseModel):
+    """Health check response"""
+
+    status: str
+
+
+@app.get("/health", response_model=HealthResponse, tags=["health"])
+def health_check():
+    """Check if the API is running"""
+    return {"status": "ok"}
+
+
+@app.post(
+    "/items", response_model=Item, status_code=status.HTTP_201_CREATED, tags=["items"]
+)
+def create_item(item: ItemCreate):
+    """Create a new item"""
+    item_id = str(uuid.uuid4())
+    new_item = {
+        "id": item_id,
+        "name": item.name,
+        "price": item.price,
+        "description": item.description,
+    }
+    items_db[item_id] = new_item
+    return new_item
+
+
+@app.get("/items", response_model=list[Item], tags=["items"])
+def list_items():
+    """Get all items"""
+    return list(items_db.values())
+
+
+@app.get("/items/{item_id}", response_model=Item, tags=["items"])
+def get_item(item_id: str):
+    """Get a specific item by ID"""
+    if item_id not in items_db:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Item with id '{item_id}' not found",
+        )
+    return items_db[item_id]
+
+
+@app.delete("/items/{item_id}", status_code=status.HTTP_204_NO_CONTENT, tags=["items"])
+def delete_item(item_id: str):
+    """Delete an item by ID"""
+    if item_id not in items_db:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Item with id '{item_id}' not found",
+        )
+    del items_db[item_id]
+    return None
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=8001)
diff --git a/openapi_specs/example_openapi.yaml b/openapi_specs/example_openapi.yaml
new file mode 100644
index 00000000..219f3f0a
--- /dev/null
+++ b/openapi_specs/example_openapi.yaml
@@ -0,0 +1,206 @@
+openapi: "3.0.3"
+info:
+  title: Example Items API
+  description: A simple API for managing items - used as a test target
+  version: "1.0.0"
+servers:
+  - url: http://localhost:8001
+    description: Local development server
+paths:
+  /health:
+    get:
+      summary: Check if the API is running
+      operationId: health_check
+      tags:
+        - health
+      responses:
+        "200":
+          description: Successful response
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/HealthResponse"
+              example:
+                status: ok
+
+  /items:
+    get:
+      summary: Get all items
+      operationId: list_items
+      tags:
+        - items
+      responses:
+        "200":
+          description: List of all items
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/Item"
+              example:
+                - id: "123e4567-e89b-12d3-a456-426614174000"
+                  name: "Test Item"
+                  price: 19.99
+                  description: "A test item"
+
+    post:
+      summary: Create a new item
+      operationId: create_item
+      tags:
+        - items
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/ItemCreate"
+            example:
+              name: "New Item"
+              price: 29.99
+              description: "A new item"
+      responses:
+        "201":
+          description: Item created successfully
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Item"
+        "422":
+          description: Validation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/HTTPValidationError"
+
+  /items/{item_id}:
+    get:
+      summary: Get a specific item by ID
+      operationId: get_item
+      tags:
+        - items
+      parameters:
+        - name: item_id
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the item to retrieve
+      responses:
+        "200":
+          description: Item found
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Item"
+        "404":
+          description: Item not found
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/HTTPError"
+
+    delete:
+      summary: Delete an item by ID
+      operationId: delete_item
+      tags:
+        - items
+      parameters:
+        - name: item_id
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the item to delete
+      responses:
+        "204":
+          description: Item deleted successfully
+        "404":
+          description: Item not found
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/HTTPError"
+
+components:
+  schemas:
+    HealthResponse:
+      type: object
+      required:
+        - status
+      properties:
+        status:
+          type: string
+          example: ok
+
+    ItemCreate:
+      type: object
+      required:
+        - name
+        - price
+      properties:
+        name:
+          type: string
+          example: "Test Item"
+        price:
+          type: number
+          format: float
+          example: 19.99
+        description:
+          type: string
+          nullable: true
+          example: "A test item"
+
+    Item:
+      type: object
+      required:
+        - id
+        - name
+        - price
+      properties:
+        id:
+          type: string
+          format: uuid
+          example: "123e4567-e89b-12d3-a456-426614174000"
+        name:
+          type: string
+          example: "Test Item"
+        price:
+          type: number
+          format: float
+          example: 19.99
+        description:
+          type: string
+          nullable: true
+          example: "A test item"
+
+    HTTPError:
+      type: object
+      properties:
+        detail:
+          type: string
+          example: "Item not found"
+
+    HTTPValidationError:
+      type: object
+      properties:
+        detail:
+          type: array
+          items:
+            $ref: "#/components/schemas/ValidationError"
+
+    ValidationError:
+      type: object
+      required:
+        - loc
+        - msg
+        - type
+      properties:
+        loc:
+          type: array
+          items:
+            type: string
+        msg:
+          type: string
+        type:
+          type: string
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..7b22a780
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,21 @@
+# Core dependencies
+fastapi>=0.104.0
+openai>=1.0.0
+uvicorn[standard]>=0.24.0
+pydantic>=2.5.0
+httpx>=0.25.0
+pyyaml>=6.0
+
+# Streamlit UI
+streamlit>=1.28.0
+
+# LLM Providers (optional - install if using real LLM mode)
+openai>=1.0.0
+anthropic>=0.18.0
+
+# Testing
+pytest>=7.4.0
+pytest-asyncio>=0.21.0
+
+# Development
+ruff>=0.1.0
diff --git a/run.py b/run.py
new file mode 100644
index 00000000..b55f5f5d
--- /dev/null
+++ b/run.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python3
+"""
+Simple runner script for the OpenAPI Test Generator.
+Works on both Windows and Mac/Linux.
+
+Usage:
+    python run.py setup      - Install dependencies
+    python run.py api        - Start the FastAPI backend (port 8000)
+    python run.py ui         - Start the Streamlit UI (port 8501)
+    python run.py example    - Start the example API (port 8001)
+    python run.py all        - Start all services (API + UI + Example)
+    python run.py test       - Run all tests
+    python run.py clean      - Clean generated files
+"""
+
+import subprocess
+import sys
+import os
+import shutil
+import time
+from pathlib import Path
+
+# Change to script directory
+os.chdir(Path(__file__).parent)
+
+
+def run_command(cmd, description=None):
+    """Run a command and print output."""
+    if description:
+        print(f"\n{'='*60}")
+        print(f"  {description}")
+        print(f"{'='*60}\n")
+
+    # Use shell=True on Windows for better compatibility
+    if sys.platform == "win32":
+        result = subprocess.run(cmd, shell=True)
+    else:
+        result = subprocess.run(cmd, shell=True)
+
+    return result.returncode == 0
+
+
+def setup():
+    """Install all dependencies."""
+    print("\n" + "="*60)
+    print("  Setting up OpenAPI Test Generator")
+    print("="*60 + "\n")
+
+    # Check Python version
+    if sys.version_info < (3, 10):
+        print("ERROR: Python 3.10 or higher is required")
+        print(f"Current version: {sys.version}")
+        return False
+
+    print(f"Python version: {sys.version}")
+    print("\nInstalling dependencies...")
+
+    success = run_command(f"{sys.executable} -m pip install -r requirements.txt")
+
+    if success:
+        print("\n" + "="*60)
+        print("  Setup complete!")
+        print("="*60)
+        print("\nNext steps:")
+        print("  1. Start the API:     python run.py api")
+        print("  2. Start the UI:      python run.py ui")
+        print("  3. Or start all:      python run.py all")
+        print("\nThe UI will be available at: http://localhost:8501")
+    else:
+        print("\nSetup failed. Please check the error messages above.")
+
+    return success
+
+
+def start_api():
+    """Start the FastAPI backend."""
+    print("\nStarting FastAPI backend on http://localhost:8000 ...")
+    print("Press Ctrl+C to stop\n")
+    run_command(f"{sys.executable} -m uvicorn app.main:app --reload --port 8000")
+
+
+def start_ui():
+    """Start the Streamlit UI."""
+    print("\nStarting Streamlit UI on http://localhost:8501 ...")
+    print("Press Ctrl+C to stop\n")
+    run_command(f"{sys.executable} -m streamlit run streamlit_app/app.py --server.port 8501")
+
+
+def start_example():
+    """Start the example target API."""
+    print("\nStarting Example API on http://localhost:8001 ...")
+    print("Press Ctrl+C to stop\n")
+    run_command(f"{sys.executable} -m uvicorn example_api.main:app --reload --port 8001")
+
+
+def start_all():
+    """Start all services in separate processes."""
+    print("\n" + "="*60)
+    print("  Starting All Services")
+    print("="*60)
+    print("\nThis will start:")
+    print("  - FastAPI Backend:  http://localhost:8000")
+    print("  - Streamlit UI:     http://localhost:8501")
+    print("  - Example API:      http://localhost:8001")
+    print("\nPress Ctrl+C to stop all services\n")
+
+    processes = []
+
+    try:
+        # Start API
+        api_proc = subprocess.Popen(
+            [sys.executable, "-m", "uvicorn", "app.main:app", "--port", "8000"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT
+        )
+        processes.append(("API", api_proc))
+        print("Started: FastAPI Backend (port 8000)")
+
+        # Start Example API
+        example_proc = subprocess.Popen(
+            [sys.executable, "-m", "uvicorn", "example_api.main:app", "--port", "8001"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT
+        )
+        processes.append(("Example", example_proc))
+        print("Started: Example API (port 8001)")
+
+        # Wait a moment for APIs to start
+        time.sleep(2)
+
+        # Start Streamlit (this one we let output to console)
+        ui_proc = subprocess.Popen(
+            [sys.executable, "-m", "streamlit", "run", "streamlit_app/app.py", "--server.port", "8501"],
+        )
+        processes.append(("UI", ui_proc))
+        print("Started: Streamlit UI (port 8501)")
+
+        print("\n" + "="*60)
+        print("  All services running!")
+        print("  Open http://localhost:8501 in your browser")
+        print("="*60 + "\n")
+
+        # Wait for UI process (main one)
+        ui_proc.wait()
+
+    except KeyboardInterrupt:
+        print("\n\nStopping all services...")
+    finally:
+        for name, proc in processes:
+            proc.terminate()
+            try:
+                proc.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                proc.kill()
+            print(f"Stopped: {name}")
+        print("All services stopped.")
+
+
+def run_tests():
+    """Run all tests."""
+    print("\n" + "="*60)
+    print("  Running Tests")
+    print("="*60 + "\n")
+
+    # Create test-results directory
+    Path("test-results").mkdir(exist_ok=True)
+
+    success = run_command(
+        f"{sys.executable} -m pytest tests/ -v --tb=short"
+    )
+
+    if success:
+        print("\n" + "="*60)
+        print("  All tests passed!")
+        print("="*60)
+    else:
+        print("\n" + "="*60)
+        print("  Some tests failed. Check output above.")
+        print("="*60)
+
+    return success
+
+
+def clean():
+    """Clean generated files."""
+    print("\nCleaning generated files...")
+
+    dirs_to_clean = [
+        "generated_tests",
+        "data",
+        "test-results",
+        ".pytest_cache",
+        "__pycache__",
+    ]
+
+    for dir_name in dirs_to_clean:
+        dir_path = Path(dir_name)
+        if dir_path.exists():
+            shutil.rmtree(dir_path, ignore_errors=True)
+            print(f"  Removed: {dir_name}/")
+
+    # Clean __pycache__ in subdirectories
+    for pycache in Path(".").rglob("__pycache__"):
+        shutil.rmtree(pycache, ignore_errors=True)
+
+    print("\nClean complete!")
+
+
+def show_help():
+    """Show help message."""
+    print("""
+============================================================
+        OpenAPI Test Generator - Quick Start
+============================================================
+
+COMMANDS:
+  python run.py setup      Install dependencies (run this first!)
+  python run.py all        Start all services (recommended)
+  python run.py api        Start only the FastAPI backend
+  python run.py ui         Start only the Streamlit UI
+  python run.py example    Start only the example target API
+  python run.py test       Run all tests
+  python run.py clean      Clean generated files
+
+QUICK START:
+  1. python run.py setup   (install dependencies)
+  2. python run.py all     (start everything)
+  3. Open http://localhost:8501 in your browser
+
+PORTS:
+  - Streamlit UI:     http://localhost:8501
+  - FastAPI Backend:  http://localhost:8000
+  - Example API:      http://localhost:8001
+""")
+
+
+def main():
+    if len(sys.argv) < 2:
+        show_help()
+        return
+
+    command = sys.argv[1].lower()
+
+    commands = {
+        "setup": setup,
+        "install": setup,
+        "api": start_api,
+        "backend": start_api,
+        "ui": start_ui,
+        "frontend": start_ui,
+        "example": start_example,
+        "target": start_example,
+        "all": start_all,
+        "start": start_all,
+        "test": run_tests,
+        "tests": run_tests,
+        "clean": clean,
+        "help": show_help,
+        "-h": show_help,
+        "--help": show_help,
+    }
+
+    if command in commands:
+        commands[command]()
+    else:
+        print(f"Unknown command: {command}")
+        show_help()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/streamlit_app/app.py b/streamlit_app/app.py
new file mode 100644
index 00000000..9f6b1458
--- /dev/null
+++ b/streamlit_app/app.py
@@ -0,0 +1,58 @@
+"""
+Streamlit App - Main entry point for the web UI.
+"""
+
+import streamlit as st
+
+st.set_page_config(page_title="OpenAPI Test Generator", page_icon="🧪", layout="wide")
+
+st.title("🧪 OpenAPI Test Generator")
+
+st.markdown("""
+Welcome to the OpenAPI Test Generator! This tool helps you:
+
+1. **Upload** an OpenAPI specification
+2. **Generate** pytest test files automatically
+3. **Run** the tests against your API
+4. **View** detailed results and failures
+
+### Getting Started
+
+Use the sidebar to navigate between pages:
+
+- **📄 Upload Spec** - Upload or paste your OpenAPI specification
+- **⚙️ Generate Tests** - Generate pytest files from your spec
+- **▶️ Run Tests** - Execute tests and view results
+
+### Quick Links
+
+- [OpenAPI Specification](https://swagger.io/specification/)
+- [Pytest Documentation](https://docs.pytest.org/)
+""")
+
+# Show quick stats if we have data
+st.divider()
+st.subheader("Current Session")
+
+col1, col2, col3 = st.columns(3)
+
+with col1:
+    spec_id = st.session_state.get("current_spec_id")
+    if spec_id:
+        st.metric("Current Spec", spec_id[:8] + "...")
+    else:
+        st.metric("Current Spec", "None")
+
+with col2:
+    gen_id = st.session_state.get("current_generation_id")
+    if gen_id:
+        st.metric("Current Generation", gen_id[:8] + "...")
+    else:
+        st.metric("Current Generation", "None")
+
+with col3:
+    run_id = st.session_state.get("last_run_id")
+    if run_id:
+        st.metric("Last Run", run_id[:8] + "...")
+    else:
+        st.metric("Last Run", "None")
diff --git "a/streamlit_app/pages/1_\360\237\223\204_Upload_Spec.py" "b/streamlit_app/pages/1_\360\237\223\204_Upload_Spec.py"
new file mode 100644
index 00000000..f3989703
--- /dev/null
+++ "b/streamlit_app/pages/1_\360\237\223\204_Upload_Spec.py"
@@ -0,0 +1,152 @@
+"""
+Upload Spec Page - Upload or paste OpenAPI specifications.
+"""
+
+import streamlit as st
+from pathlib import Path
+import sys
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from app.openapi_parser import parse_and_normalize, OpenAPIParseError
+from app.storage.db import get_database
+from app.config import config
+
+st.set_page_config(page_title="Upload Spec", page_icon="📄", layout="wide")
+
+st.title("📄 Upload OpenAPI Specification")
+
+# Load example spec
+EXAMPLE_SPEC_PATH = (
+    Path(__file__).parent.parent.parent / "openapi_specs" / "example_openapi.yaml"
+)
+
+
+def load_example_spec() -> str:
+    """Load the example OpenAPI spec"""
+    if EXAMPLE_SPEC_PATH.exists():
+        return EXAMPLE_SPEC_PATH.read_text()
+    return ""
+
+
+# Input methods
+st.subheader("Input Method")
+input_method = st.radio(
+    "Choose how to provide your OpenAPI spec:",
+    ["Paste Content", "Upload File", "Load Example"],
+    horizontal=True,
+)
+
+spec_content = ""
+spec_name = ""
+
+if input_method == "Paste Content":
+    spec_name = st.text_input("Spec Name", placeholder="My API Spec")
+    spec_content = st.text_area(
+        "Paste your OpenAPI YAML or JSON here:",
+        height=400,
+        placeholder="openapi: '3.0.0'\ninfo:\n  title: My API\n  version: '1.0.0'\npaths: {}",
+    )
+
+elif input_method == "Upload File":
+    spec_name = st.text_input("Spec Name", placeholder="My API Spec")
+    uploaded_file = st.file_uploader(
+        "Upload OpenAPI YAML or JSON file", type=["yaml", "yml", "json"]
+    )
+    if uploaded_file:
+        spec_content = uploaded_file.read().decode("utf-8")
+        if not spec_name:
+            spec_name = uploaded_file.name.rsplit(".", 1)[0]
+        st.code(
+            spec_content[:500] + "..." if len(spec_content) > 500 else spec_content,
+            language="yaml",
+        )
+
+elif input_method == "Load Example":
+    spec_name = "Example Items API"
+    spec_content = load_example_spec()
+    if spec_content:
+        st.info("Loaded example OpenAPI spec for the Items API")
+        with st.expander("View Example Spec"):
+            st.code(spec_content, language="yaml")
+    else:
+        st.error("Example spec file not found")
+
+# Preview and Save
+if spec_content:
+    st.divider()
+    st.subheader("Preview")
+
+    try:
+        normalized = parse_and_normalize(spec_content)
+
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.metric("Title", normalized.title)
+        with col2:
+            st.metric("Version", normalized.version)
+        with col3:
+            st.metric("Endpoints", len(normalized.endpoints))
+
+        # Show endpoints table
+        st.subheader("Endpoints")
+        endpoints_data = [
+            {
+                "Method": ep.method,
+                "Path": ep.path,
+                "Operation ID": ep.operation_id or "-",
+                "Summary": ep.summary or "-",
+            }
+            for ep in normalized.endpoints
+        ]
+        st.dataframe(endpoints_data, use_container_width=True)
+
+        # Save button
+        st.divider()
+        if st.button("💾 Save Specification", type="primary", disabled=not spec_name):
+            if not spec_name:
+                st.error("Please provide a name for the specification")
+            else:
+                try:
+                    db = get_database(config.DATABASE_PATH)
+                    spec = db.create_spec(
+                        name=spec_name,
+                        raw_content=spec_content,
+                        title=normalized.title,
+                        version=normalized.version,
+                        endpoint_count=len(normalized.endpoints),
+                    )
+                    st.session_state["current_spec_id"] = spec.id
+                    st.success(f"✅ Saved! Spec ID: `{spec.id}`")
+                    st.info(
+                        "Navigate to **Generate Tests** page to create tests for this spec."
+                    )
+                except Exception as e:
+                    st.error(f"Failed to save: {e}")
+
+    except OpenAPIParseError as e:
+        st.error(f"Invalid OpenAPI specification: {e}")
+
+# Show existing specs
+st.divider()
+st.subheader("Existing Specifications")
+
+db = get_database(config.DATABASE_PATH)
+specs = db.list_specs()
+
+if specs:
+    for spec in specs:
+        with st.expander(f"📄 {spec.name} ({spec.title} v{spec.version})"):
+            col1, col2 = st.columns([3, 1])
+            with col1:
+                st.write(f"**ID:** `{spec.id}`")
+                st.write(f"**Endpoints:** {spec.endpoint_count}")
+                st.write(f"**Created:** {spec.created_at}")
+            with col2:
+                if st.button("Select", key=f"select_{spec.id}"):
+                    st.session_state["current_spec_id"] = spec.id
+                    st.success(f"Selected spec: {spec.name}")
+                    st.rerun()
+else:
+    st.info("No specifications saved yet. Upload one above!")
diff --git "a/streamlit_app/pages/2_\342\232\231\357\270\217_Generate_Tests.py" "b/streamlit_app/pages/2_\342\232\231\357\270\217_Generate_Tests.py"
new file mode 100644
index 00000000..6b054a3b
--- /dev/null
+++ "b/streamlit_app/pages/2_\342\232\231\357\270\217_Generate_Tests.py"
@@ -0,0 +1,232 @@
+"""
+Generate Tests Page - Generate pytest files from specifications.
+"""
+
+import os
+import streamlit as st
+from pathlib import Path
+import sys
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from app.openapi_parser import parse_and_normalize
+from app.generator import TestGenerator, get_llm_client, MockLLMClient
+from app.storage.db import get_database
+from app.config import config
+
+st.set_page_config(page_title="Generate Tests", page_icon="⚙️", layout="wide")
+
+st.title("⚙️ Generate Tests")
+
+# Get database and specs
+db = get_database(config.DATABASE_PATH)
+specs = db.list_specs()
+
+if not specs:
+    st.warning("No specifications found. Please upload a spec first.")
+    st.page_link("pages/1_📄_Upload_Spec.py", label="Go to Upload Spec", icon="📄")
+    st.stop()
+
+# Spec selection
+st.subheader("Select Specification")
+
+spec_options = {f"{s.name} ({s.title} v{s.version})": s.id for s in specs}
+default_index = 0
+
+# Try to select current spec from session state
+if "current_spec_id" in st.session_state:
+    for i, (name, sid) in enumerate(spec_options.items()):
+        if sid == st.session_state["current_spec_id"]:
+            default_index = i
+            break
+
+selected_name = st.selectbox(
+    "Choose a specification:", options=list(spec_options.keys()), index=default_index
+)
+selected_spec_id = spec_options[selected_name]
+
+# Get spec details
+spec = db.get_spec(selected_spec_id)
+if spec:
+    normalized = parse_and_normalize(spec.raw_content)
+
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric("Title", normalized.title)
+    with col2:
+        st.metric("Version", normalized.version)
+    with col3:
+        st.metric("Endpoints", len(normalized.endpoints))
+
+# Generation options
+st.divider()
+st.subheader("LLM Configuration")
+
+# Provider selection
+provider_options = {
+    "Mock (Deterministic - No API needed)": "mock",
+    "OpenAI (GPT-4, GPT-3.5)": "openai",
+    "Anthropic (Claude)": "anthropic",
+}
+
+selected_provider_name = st.selectbox(
+    "Select LLM Provider:",
+    options=list(provider_options.keys()),
+    index=0,
+    help="Mock mode generates tests deterministically without any API calls. OpenAI and Anthropic require API keys.",
+)
+selected_provider = provider_options[selected_provider_name]
+
+# API Key and Model configuration (only show if not mock)
+api_key = None
+model = None
+
+if selected_provider != "mock":
+    st.info(
+        f"**{selected_provider_name}** requires an API key. "
+        "Your key is only used for this session and not stored."
+    )
+
+    col1, col2 = st.columns(2)
+
+    with col1:
+        # Check for existing env var
+        env_key_name = (
+            "OPENAI_API_KEY" if selected_provider == "openai" else "ANTHROPIC_API_KEY"
+        )
+        existing_key = os.environ.get(env_key_name) or os.environ.get("LLM_API_KEY")
+
+        if existing_key:
+            st.success(f"API key found in environment ({env_key_name})")
+            use_env_key = st.checkbox("Use environment variable key", value=True)
+            if use_env_key:
+                api_key = existing_key
+            else:
+                api_key = st.text_input(
+                    "Enter API Key:",
+                    type="password",
+                    help="Enter your API key",
+                )
+        else:
+            api_key = st.text_input(
+                f"Enter {selected_provider.title()} API Key:",
+                type="password",
+                help=f"Get your key from {'https://platform.openai.com/api-keys' if selected_provider == 'openai' else 'https://console.anthropic.com/settings/keys'}",
+            )
+
+    with col2:
+        if selected_provider == "openai":
+            model_options = [
+                "gpt-4o-mini",
+                "gpt-4o",
+                "gpt-4-turbo",
+                "gpt-3.5-turbo",
+            ]
+            model = st.selectbox(
+                "Select Model:",
+                options=model_options,
+                index=0,
+                help="gpt-4o-mini is fast and cost-effective. gpt-4o is more capable.",
+            )
+        else:
+            model_options = [
+                "claude-3-haiku-20240307",
+                "claude-3-sonnet-20240229",
+                "claude-3-opus-20240229",
+            ]
+            model = st.selectbox(
+                "Select Model:",
+                options=model_options,
+                index=0,
+                help="Haiku is fast and cost-effective. Opus is most capable.",
+            )
+
+    # Validate API key
+    if not api_key:
+        st.warning("Please enter an API key to use this provider.")
+
+# Mock mode info
+if selected_provider == "mock":
+    st.info(
+        "**Mock Mode**: Generates tests deterministically based on endpoint structure. "
+        "No external API calls are made. Ideal for CI/CD pipelines."
+    )
+
+# Generate button
+st.divider()
+
+can_generate = selected_provider == "mock" or (api_key and len(api_key) > 10)
+
+if st.button("🚀 Generate Tests", type="primary", disabled=not can_generate):
+    with st.spinner("Generating tests..."):
+        try:
+            # Get LLM client based on selection
+            if selected_provider == "mock":
+                llm_client = MockLLMClient()
+                st.info("Using Mock LLM (deterministic)")
+            else:
+                llm_client = get_llm_client(
+                    provider=selected_provider, api_key=api_key, model=model
+                )
+                st.info(f"Using {selected_provider.title()} ({model})")
+
+            # Generate tests
+            generator = TestGenerator(
+                llm_client=llm_client, output_dir=config.GENERATED_TESTS_DIR
+            )
+            generated_files = generator.generate(normalized, selected_spec_id)
+
+            # Save generation record
+            generation = db.create_generation(
+                spec_id=selected_spec_id,
+                test_dir=str(config.GENERATED_TESTS_DIR / selected_spec_id),
+                files=[str(f) for f in generated_files],
+            )
+
+            st.session_state["current_generation_id"] = generation.id
+            st.success(f"✅ Generated {len(generated_files)} test files!")
+
+            # Show generated files
+            st.subheader("Generated Files")
+            for file_path in generated_files:
+                with st.expander(f"📄 {file_path.name}"):
+                    content = file_path.read_text()
+                    st.code(content, language="python")
+
+            st.info("Navigate to **Run Tests** page to execute these tests.")
+
+        except Exception as e:
+            st.error(f"Failed to generate tests: {e}")
+            import traceback
+
+            st.code(traceback.format_exc())
+
+if not can_generate and selected_provider != "mock":
+    st.warning("Please enter a valid API key to generate tests with this provider.")
+
+# Show existing generations
+st.divider()
+st.subheader("Previous Generations")
+
+generations = db.list_generations(spec_id=selected_spec_id)
+if generations:
+    for gen in generations:
+        with st.expander(f"Generation {gen.id[:8]}... ({gen.status})"):
+            st.write(f"**ID:** `{gen.id}`")
+            st.write(f"**Created:** {gen.created_at}")
+            st.write(f"**Files:** {len(gen.files)}")
+
+            if st.button("Select", key=f"select_gen_{gen.id}"):
+                st.session_state["current_generation_id"] = gen.id
+                st.success(f"Selected generation: {gen.id[:8]}...")
+                st.rerun()
+
+            # Show files
+            for f in gen.files:
+                file_path = Path(f)
+                if file_path.exists():
+                    with st.expander(f"📄 {file_path.name}"):
+                        st.code(file_path.read_text(), language="python")
+else:
+    st.info("No generations found for this spec.")
diff --git "a/streamlit_app/pages/3_\342\226\266\357\270\217_Run_Tests.py" "b/streamlit_app/pages/3_\342\226\266\357\270\217_Run_Tests.py"
new file mode 100644
index 00000000..62db60a3
--- /dev/null
+++ "b/streamlit_app/pages/3_\342\226\266\357\270\217_Run_Tests.py"
@@ -0,0 +1,207 @@
+"""
+Run Tests Page - Execute tests and view results.
+"""
+
+import streamlit as st
+from pathlib import Path
+import sys
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from app.runner import run_tests
+from app.storage.db import get_database
+from app.config import config
+import json
+
+st.set_page_config(page_title="Run Tests", page_icon="▶️", layout="wide")
+
+st.title("▶️ Run Tests")
+
+# Get database and generations
+db = get_database(config.DATABASE_PATH)
+generations = db.list_generations()
+
+if not generations:
+    st.warning("No test generations found. Please generate tests first.")
+    st.page_link("pages/2_⚙️_Generate_Tests.py", label="Go to Generate Tests", icon="⚙️")
+    st.stop()
+
+# Generation selection
+st.subheader("Select Test Generation")
+
+gen_options = {}
+for g in generations:
+    spec = db.get_spec(g.spec_id)
+    spec_name = spec.name if spec else "Unknown"
+    gen_options[
+        f"{spec_name} - {g.id[:8]}... ({g.created_at.strftime('%Y-%m-%d %H:%M')})"
+    ] = g.id
+
+default_index = 0
+if "current_generation_id" in st.session_state:
+    for i, (name, gid) in enumerate(gen_options.items()):
+        if gid == st.session_state["current_generation_id"]:
+            default_index = i
+            break
+
+selected_name = st.selectbox(
+    "Choose a test generation:", options=list(gen_options.keys()), index=default_index
+)
+selected_gen_id = gen_options[selected_name]
+
+# Get generation details
+generation = db.get_generation(selected_gen_id)
+if generation:
+    st.write(f"**Test Directory:** `{generation.test_dir}`")
+    st.write(f"**Files:** {len(generation.files)}")
+
+# Run configuration
+st.divider()
+st.subheader("Run Configuration")
+
+target_url = st.text_input(
+    "Target Base URL",
+    value=config.DEFAULT_TARGET_URL,
+    help="The base URL of the API to test against",
+)
+
+# Run button
+st.divider()
+if st.button("▶️ Run Tests", type="primary"):
+    with st.spinner("Running tests..."):
+        try:
+            # Create run record
+            run = db.create_run(generation_id=selected_gen_id, target_url=target_url)
+
+            # Execute tests
+            test_dir = Path(generation.test_dir)
+            result = run_tests(
+                test_dir=test_dir, base_url=target_url, timeout=config.TEST_TIMEOUT
+            )
+
+            # Update run with results
+            status_str = "completed" if result.success else "failed"
+            db.update_run(
+                run_id=run.id,
+                status=status_str,
+                passed=result.passed,
+                failed=result.failed,
+                skipped=result.skipped,
+                errors=result.errors,
+                total=result.total,
+                duration=result.duration,
+                junit_xml_path=str(result.junit_xml_path)
+                if result.junit_xml_path
+                else None,
+                results_json=json.dumps(result.to_dict()),
+            )
+
+            st.session_state["last_run_id"] = run.id
+
+            # Show results
+            if result.success:
+                st.success("✅ All tests passed!")
+            else:
+                st.error("❌ Some tests failed")
+
+            # Results summary
+            st.subheader("Results Summary")
+
+            col1, col2, col3, col4, col5 = st.columns(5)
+            with col1:
+                st.metric("Passed", result.passed, delta_color="normal")
+            with col2:
+                st.metric(
+                    "Failed",
+                    result.failed,
+                    delta_color="inverse" if result.failed > 0 else "off",
+                )
+            with col3:
+                st.metric("Skipped", result.skipped)
+            with col4:
+                st.metric(
+                    "Errors",
+                    result.errors,
+                    delta_color="inverse" if result.errors > 0 else "off",
+                )
+            with col5:
+                st.metric("Duration", f"{result.duration:.2f}s")
+
+            # Show failures
+            if result.failures:
+                st.subheader("Failures")
+                for failure in result.failures:
+                    with st.expander(f"❌ {failure.name}", expanded=True):
+                        st.write(f"**Class:** {failure.classname}")
+                        if failure.message:
+                            st.error(failure.message)
+                        if failure.traceback:
+                            st.code(failure.traceback, language="python")
+
+            # Show stdout/stderr
+            with st.expander("📋 Test Output"):
+                tab1, tab2 = st.tabs(["stdout", "stderr"])
+                with tab1:
+                    st.code(result.stdout or "No output", language="text")
+                with tab2:
+                    st.code(result.stderr or "No errors", language="text")
+
+            # Download buttons
+            col1, col2 = st.columns(2)
+            with col1:
+                if result.junit_xml_path and result.junit_xml_path.exists():
+                    junit_content = result.junit_xml_path.read_text()
+                    st.download_button(
+                        "📥 Download JUnit XML",
+                        data=junit_content,
+                        file_name="junit.xml",
+                        mime="application/xml",
+                    )
+            with col2:
+                st.download_button(
+                    "📥 Download Results JSON",
+                    data=json.dumps(result.to_dict(), indent=2),
+                    file_name="results.json",
+                    mime="application/json",
+                )
+
+        except Exception as e:
+            st.error(f"Failed to run tests: {e}")
+            import traceback
+
+            st.code(traceback.format_exc())
+
+# Show previous runs
+st.divider()
+st.subheader("Previous Runs")
+
+runs = db.list_runs(generation_id=selected_gen_id)
+if runs:
+    for run in runs:
+        status_icon = "✅" if run.status == "completed" and run.failed == 0 else "❌"
+        with st.expander(f"{status_icon} Run {run.id[:8]}... ({run.status})"):
+            col1, col2, col3, col4 = st.columns(4)
+            with col1:
+                st.metric("Passed", run.passed)
+            with col2:
+                st.metric("Failed", run.failed)
+            with col3:
+                st.metric("Duration", f"{run.duration:.2f}s")
+            with col4:
+                st.write(f"**Created:** {run.created_at}")
+
+            if run.results_json:
+                try:
+                    results = json.loads(run.results_json)
+                    failures = results.get("failures", [])
+                    if failures:
+                        st.subheader("Failures")
+                        for f in failures:
+                            st.error(
+                                f"**{f['name']}**: {f.get('message', 'No message')}"
+                            )
+                except json.JSONDecodeError:
+                    pass
+else:
+    st.info("No previous runs for this generation.")
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..d4839a6b
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+# Tests package
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..9687221d
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,17 @@
+"""
+Pytest configuration and shared fixtures.
+"""
+
+import sys
+from pathlib import Path
+
+# Add the project root to the Python path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+
+def reset_database_singleton():
+    """Reset the global database singleton to ensure test isolation"""
+    from app.storage import db as db_module
+
+    db_module._db = None
diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py
new file mode 100644
index 00000000..f06d7f78
--- /dev/null
+++ b/tests/e2e/__init__.py
@@ -0,0 +1 @@
+# E2E tests package
diff --git a/tests/e2e/test_full_workflow.py b/tests/e2e/test_full_workflow.py
new file mode 100644
index 00000000..733327f1
--- /dev/null
+++ b/tests/e2e/test_full_workflow.py
@@ -0,0 +1,301 @@
+"""
+End-to-end tests for the complete workflow.
+
+These tests run the full pipeline:
+1. Start example API
+2. Upload spec
+3. Generate tests
+4. Run tests against example API
+5. Verify results
+"""
+
+import pytest
+import subprocess
+import time
+import tempfile
+import shutil
+from pathlib import Path
+from fastapi.testclient import TestClient
+
+
+# Read the example OpenAPI spec
+EXAMPLE_SPEC_PATH = (
+    Path(__file__).parent.parent.parent / "openapi_specs" / "example_openapi.yaml"
+)
+
+
+@pytest.fixture(scope="module")
+def example_api():
+    """Start the example API server for testing"""
+    # Start the example API
+    proc = subprocess.Popen(
+        [
+            "python",
+            "-m",
+            "uvicorn",
+            "example_api.main:app",
+            "--port",
+            "9999",
+            "--host",
+            "127.0.0.1",
+        ],
+        cwd=str(Path(__file__).parent.parent.parent),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+
+    # Wait for it to start
+    time.sleep(2)
+
+    # Check if it's running
+    import httpx
+
+    for _ in range(10):
+        try:
+            response = httpx.get("http://127.0.0.1:9999/health", timeout=1)
+            if response.status_code == 200:
+                break
+        except Exception:
+            time.sleep(0.5)
+
+    yield "http://127.0.0.1:9999"
+
+    # Cleanup
+    proc.terminate()
+    try:
+        proc.wait(timeout=5)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+
+
+@pytest.fixture
+def temp_dir():
+    """Create a temporary directory for test artifacts"""
+    temp = tempfile.mkdtemp()
+    yield Path(temp)
+    # On Windows, we need to ignore errors when deleting temp files
+    # as SQLite may still have the file locked
+    shutil.rmtree(temp, ignore_errors=True)
+
+
+@pytest.fixture
+def client(temp_dir, monkeypatch):
+    """Create test client with isolated database"""
+    from app.main import app
+    from app.storage import db as db_module
+    from app import config as config_module
+
+    # Reset the database singleton FIRST
+    db_module._db = None
+
+    # Set environment variables for test
+    monkeypatch.setenv("DATABASE_PATH", str(temp_dir / "test.db"))
+    monkeypatch.setenv("GENERATED_TESTS_DIR", str(temp_dir / "generated_tests"))
+
+    # Update config to use temp paths (must be done after singleton reset)
+    config_module.config.DATABASE_PATH = temp_dir / "test.db"
+    config_module.config.GENERATED_TESTS_DIR = temp_dir / "generated_tests"
+
+    # Force create new database with correct path
+    db_module.get_database(temp_dir / "test.db")
+
+    yield TestClient(app)
+
+    # Cleanup: reset database singleton to release file handle
+    db_module._db = None
+
+
+class TestFullWorkflow:
+    """Full end-to-end workflow tests"""
+
+    @pytest.mark.skipif(not EXAMPLE_SPEC_PATH.exists(), reason="Example spec not found")
+    def test_complete_workflow(self, client, example_api, temp_dir):
+        """Test the complete workflow: upload -> generate -> run"""
+        # 1. Upload the example spec
+        spec_content = EXAMPLE_SPEC_PATH.read_text()
+        response = client.post(
+            "/specs", json={"name": "Example API", "content": spec_content}
+        )
+        assert response.status_code == 201
+        spec_id = response.json()["id"]
+
+        # 2. Generate tests
+        response = client.post("/generate", json={"spec_id": spec_id})
+        assert response.status_code == 201
+        generation = response.json()
+        assert generation["status"] == "completed"
+        assert len(generation["files"]) > 0
+        generation_id = generation["id"]
+
+        # 3. Run tests against example API
+        response = client.post(
+            "/runs", json={"generation_id": generation_id, "target_url": example_api}
+        )
+        assert response.status_code == 201
+        run = response.json()
+        assert run["status"] in ["completed", "failed"]
+        assert run["total"] > 0
+        run_id = run["id"]
+
+        # 4. Get detailed results
+        response = client.get(f"/runs/{run_id}")
+        assert response.status_code == 200
+        details = response.json()
+        assert "failures" in details
+
+        # 5. Verify JUnit XML is available
+        response = client.get(f"/runs/{run_id}/junit")
+        assert response.status_code == 200
+        assert "<?xml" in response.text
+
+    @pytest.mark.skipif(not EXAMPLE_SPEC_PATH.exists(), reason="Example spec not found")
+    def test_generated_tests_pass(self, client, example_api, temp_dir):
+        """Verify that generated tests actually pass against the example API"""
+        # Upload spec
+        spec_content = EXAMPLE_SPEC_PATH.read_text()
+        response = client.post(
+            "/specs", json={"name": "Example API", "content": spec_content}
+        )
+        spec_id = response.json()["id"]
+
+        # Generate tests
+        response = client.post("/generate", json={"spec_id": spec_id})
+        generation_id = response.json()["id"]
+
+        # Run tests
+        response = client.post(
+            "/runs", json={"generation_id": generation_id, "target_url": example_api}
+        )
+        run = response.json()
+
+        # Check that at least some tests passed
+        assert run["passed"] > 0, "Expected at least some tests to pass"
+
+        # The health check test should definitely pass
+        if run["failed"] > 0:
+            # Get failure details
+            response = client.get(f"/runs/{run['id']}")
+            details = response.json()
+            print("Failures:", details.get("failures", []))
+
+    def test_workflow_with_inline_spec(self, client, temp_dir):
+        """Test workflow with an inline minimal spec (no external API needed)"""
+        # Create a minimal spec
+        spec = """
+openapi: "3.0.0"
+info:
+  title: Minimal API
+  version: "1.0.0"
+paths:
+  /test:
+    get:
+      operationId: test_endpoint
+      responses:
+        "200":
+          description: OK
+"""
+        # Upload spec
+        response = client.post("/specs", json={"name": "Minimal", "content": spec})
+        assert response.status_code == 201
+        spec_id = response.json()["id"]
+
+        # Generate tests
+        response = client.post("/generate", json={"spec_id": spec_id})
+        assert response.status_code == 201
+        generation = response.json()
+
+        # Verify files were generated
+        assert len(generation["files"]) > 0
+
+        # Verify test file contains expected content
+        for file_path in generation["files"]:
+            path = Path(file_path)
+            if path.name.startswith("test_"):
+                content = path.read_text()
+                assert "def test_" in content
+                assert "client" in content
+
+
+class TestGeneratedTestsContent:
+    """Tests to verify the content of generated tests"""
+
+    def test_generated_tests_are_valid_python(self, client, temp_dir):
+        """Generated test files should be syntactically valid Python"""
+        import ast
+
+        spec = """
+openapi: "3.0.0"
+info:
+  title: Test API
+  version: "1.0.0"
+paths:
+  /items:
+    get:
+      operationId: list_items
+      responses:
+        "200":
+          description: OK
+    post:
+      operationId: create_item
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                name:
+                  type: string
+      responses:
+        "201":
+          description: Created
+"""
+        # Upload and generate
+        response = client.post("/specs", json={"name": "Test", "content": spec})
+        spec_id = response.json()["id"]
+
+        response = client.post("/generate", json={"spec_id": spec_id})
+        generation = response.json()
+
+        # Verify all files are valid Python
+        for file_path in generation["files"]:
+            path = Path(file_path)
+            content = path.read_text()
+            try:
+                ast.parse(content)
+            except SyntaxError as e:
+                pytest.fail(f"Generated file {path.name} is not valid Python: {e}")
+
+    def test_conftest_has_fixtures(self, client, temp_dir):
+        """conftest.py should have required fixtures"""
+        spec = """
+openapi: "3.0.0"
+info:
+  title: Test API
+  version: "1.0.0"
+paths:
+  /test:
+    get:
+      responses:
+        "200":
+          description: OK
+"""
+        # Upload and generate
+        response = client.post("/specs", json={"name": "Test", "content": spec})
+        spec_id = response.json()["id"]
+
+        response = client.post("/generate", json={"spec_id": spec_id})
+        generation = response.json()
+
+        # Find conftest.py
+        conftest_path = None
+        for file_path in generation["files"]:
+            if Path(file_path).name == "conftest.py":
+                conftest_path = Path(file_path)
+                break
+
+        assert conftest_path is not None, "conftest.py should be generated"
+        content = conftest_path.read_text()
+
+        assert "def base_url" in content, "Should have base_url fixture"
+        assert "def client" in content, "Should have client fixture"
+        assert "@pytest.fixture" in content, "Should use pytest fixtures"
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
new file mode 100644
index 00000000..a2650482
--- /dev/null
+++ b/tests/integration/__init__.py
@@ -0,0 +1 @@
+# Integration tests package
diff --git a/tests/integration/test_api_endpoints.py b/tests/integration/test_api_endpoints.py
new file mode 100644
index 00000000..e38ccfef
--- /dev/null
+++ b/tests/integration/test_api_endpoints.py
@@ -0,0 +1,260 @@
+"""
+Integration tests for FastAPI endpoints.
+"""
+
+import pytest
+from fastapi.testclient import TestClient
+from pathlib import Path
+import tempfile
+import shutil
+
+from app.main import app
+
+
+# Sample OpenAPI spec for testing
+SAMPLE_SPEC = """
+openapi: "3.0.0"
+info:
+  title: Test API
+  version: "1.0.0"
+paths:
+  /health:
+    get:
+      operationId: health_check
+      responses:
+        "200":
+          description: OK
+  /items:
+    get:
+      operationId: list_items
+      responses:
+        "200":
+          description: List of items
+    post:
+      operationId: create_item
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - name
+              properties:
+                name:
+                  type: string
+      responses:
+        "201":
+          description: Created
+        "422":
+          description: Validation error
+  /items/{id}:
+    get:
+      operationId: get_item
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Item found
+        "404":
+          description: Not found
+"""
+
+
+@pytest.fixture
+def temp_dir():
+    """Create a temporary directory for test artifacts"""
+    temp = tempfile.mkdtemp()
+    yield Path(temp)
+    # On Windows, we need to ignore errors when deleting temp files
+    # as SQLite may still have the file locked
+    shutil.rmtree(temp, ignore_errors=True)
+
+
+@pytest.fixture(autouse=False)
+def client(temp_dir, monkeypatch):
+    """Create test client with isolated database"""
+    # Import modules
+    from app.storage import db as db_module
+    from app import config as config_module
+
+    # Reset the database singleton FIRST
+    db_module._db = None
+
+    # Set environment variables for test
+    monkeypatch.setenv("DATABASE_PATH", str(temp_dir / "test.db"))
+    monkeypatch.setenv("GENERATED_TESTS_DIR", str(temp_dir / "generated_tests"))
+
+    # Update config to use temp paths (must be done after singleton reset)
+    config_module.config.DATABASE_PATH = temp_dir / "test.db"
+    config_module.config.GENERATED_TESTS_DIR = temp_dir / "generated_tests"
+
+    # Force create new database with correct path
+    db_module.get_database(temp_dir / "test.db")
+
+    yield TestClient(app)
+
+    # Cleanup: reset database singleton to release file handle
+    db_module._db = None
+
+
+class TestHealthEndpoint:
+    """Tests for /health endpoint"""
+
+    def test_health_check(self, client):
+        """Health endpoint should return ok status"""
+        response = client.get("/health")
+        assert response.status_code == 200
+        assert response.json() == {"status": "ok"}
+
+
+class TestSpecEndpoints:
+    """Tests for /specs endpoints"""
+
+    def test_create_spec(self, client):
+        """POST /specs should create and return spec"""
+        response = client.post(
+            "/specs", json={"name": "Test API", "content": SAMPLE_SPEC}
+        )
+
+        assert response.status_code == 201
+        data = response.json()
+        assert "id" in data
+        assert data["name"] == "Test API"
+        assert data["title"] == "Test API"
+        assert data["version"] == "1.0.0"
+        assert data["endpoint_count"] == 4
+
+    def test_create_spec_invalid(self, client):
+        """POST /specs with invalid spec should return 400"""
+        response = client.post(
+            "/specs", json={"name": "Invalid", "content": "not valid yaml {{{"}
+        )
+
+        assert response.status_code == 400
+        assert "Invalid OpenAPI spec" in response.json()["detail"]
+
+    def test_list_specs(self, client):
+        """GET /specs should return list of specs"""
+        # Create a spec first
+        client.post("/specs", json={"name": "Test", "content": SAMPLE_SPEC})
+
+        response = client.get("/specs")
+        assert response.status_code == 200
+        data = response.json()
+        assert isinstance(data, list)
+        assert len(data) == 1
+
+    def test_get_spec(self, client):
+        """GET /specs/{id} should return spec details"""
+        # Create a spec first
+        create_response = client.post(
+            "/specs", json={"name": "Test", "content": SAMPLE_SPEC}
+        )
+        spec_id = create_response.json()["id"]
+
+        response = client.get(f"/specs/{spec_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == spec_id
+        assert "endpoints" in data
+        assert len(data["endpoints"]) == 4
+
+    def test_get_spec_not_found(self, client):
+        """GET /specs/{id} with invalid id should return 404"""
+        response = client.get("/specs/nonexistent-id")
+        assert response.status_code == 404
+
+    def test_delete_spec(self, client):
+        """DELETE /specs/{id} should delete spec"""
+        # Create a spec first
+        create_response = client.post(
+            "/specs", json={"name": "Test", "content": SAMPLE_SPEC}
+        )
+        spec_id = create_response.json()["id"]
+
+        # Delete it
+        response = client.delete(f"/specs/{spec_id}")
+        assert response.status_code == 204
+
+        # Verify it's gone
+        response = client.get(f"/specs/{spec_id}")
+        assert response.status_code == 404
+
+
+class TestGenerationEndpoints:
+    """Tests for /generate endpoints"""
+
+    def test_generate_tests(self, client):
+        """POST /generate should create test files"""
+        # Create a spec first
+        create_response = client.post(
+            "/specs", json={"name": "Test", "content": SAMPLE_SPEC}
+        )
+        spec_id = create_response.json()["id"]
+
+        # Generate tests
+        response = client.post("/generate", json={"spec_id": spec_id})
+        assert response.status_code == 201
+        data = response.json()
+        assert "id" in data
+        assert data["spec_id"] == spec_id
+        assert data["status"] == "completed"
+        assert len(data["files"]) > 0
+
+    def test_generate_spec_not_found(self, client):
+        """POST /generate with invalid spec_id should return 404"""
+        response = client.post("/generate", json={"spec_id": "nonexistent"})
+        assert response.status_code == 404
+
+    def test_get_generation(self, client):
+        """GET /generations/{id} should return generation details"""
+        # Create spec and generate
+        create_response = client.post(
+            "/specs", json={"name": "Test", "content": SAMPLE_SPEC}
+        )
+        spec_id = create_response.json()["id"]
+        gen_response = client.post("/generate", json={"spec_id": spec_id})
+        gen_id = gen_response.json()["id"]
+
+        response = client.get(f"/generations/{gen_id}")
+        assert response.status_code == 200
+        data = response.json()
+        assert data["id"] == gen_id
+
+    def test_list_generations(self, client):
+        """GET /generations should return list"""
+        # Create spec and generate
+        create_response = client.post(
+            "/specs", json={"name": "Test", "content": SAMPLE_SPEC}
+        )
+        spec_id = create_response.json()["id"]
+        client.post("/generate", json={"spec_id": spec_id})
+
+        response = client.get("/generations")
+        assert response.status_code == 200
+        data = response.json()
+        assert isinstance(data, list)
+        assert len(data) == 1
+
+
+class TestRunEndpoints:
+    """Tests for /runs endpoints"""
+
+    def test_list_runs_empty(self, client):
+        """GET /runs should return empty list initially"""
+        response = client.get("/runs")
+        assert response.status_code == 200
+        assert response.json() == []
+
+    def test_get_run_not_found(self, client):
+        """GET /runs/{id} with invalid id should return 404"""
+        response = client.get("/runs/nonexistent")
+        assert response.status_code == 404
+
+    # Note: Full run tests require a running target API
+    # Those are covered in e2e tests
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 00000000..4a5d2636
--- /dev/null
+++ b/tests/unit/__init__.py
@@ -0,0 +1 @@
+# Unit tests package
diff --git a/tests/unit/test_generator.py b/tests/unit/test_generator.py
new file mode 100644
index 00000000..a64ff0f3
--- /dev/null
+++ b/tests/unit/test_generator.py
@@ -0,0 +1,285 @@
+"""
+Unit tests for the test generator.
+"""
+
+import ast
+import pytest
+
+from app.openapi_parser import (
+    NormalizedSpec,
+    Endpoint,
+    Parameter,
+    RequestBody,
+    Response,
+)
+from app.generator.llm_client import MockLLMClient, GeneratedTestCase
+from app.generator.test_generator import PytestGenerator
+
+
+class TestMockLLMClient:
+    """Tests for MockLLMClient"""
+
+    @pytest.fixture
+    def client(self):
+        return MockLLMClient()
+
+    @pytest.fixture
+    def sample_spec(self):
+        return NormalizedSpec(
+            title="Test API", version="1.0.0", endpoints=[], schemas={}
+        )
+
+    def test_generates_deterministic_output(self, client, sample_spec):
+        """MockLLMClient should produce identical output for identical input"""
+        endpoint = Endpoint(
+            path="/items",
+            method="GET",
+            operation_id="list_items",
+            responses=[Response(status_code=200, description="OK")],
+        )
+
+        result1 = client.generate_test_plan(endpoint, sample_spec)
+        result2 = client.generate_test_plan(endpoint, sample_spec)
+
+        assert len(result1.test_cases) == len(result2.test_cases)
+        assert result1.test_cases[0].name == result2.test_cases[0].name
+        assert (
+            result1.test_cases[0].expected_status
+            == result2.test_cases[0].expected_status
+        )
+
+    def test_generates_happy_path_for_get(self, client, sample_spec):
+        """Should generate happy path test for GET endpoint"""
+        endpoint = Endpoint(
+            path="/items",
+            method="GET",
+            operation_id="list_items",
+            responses=[Response(status_code=200, description="OK")],
+        )
+
+        result = client.generate_test_plan(endpoint, sample_spec)
+
+        assert len(result.test_cases) >= 1
+        happy_path = result.test_cases[0]
+        assert "success" in happy_path.name
+        assert happy_path.expected_status == 200
+
+    def test_generates_happy_path_for_post(self, client, sample_spec):
+        """Should generate happy path test for POST endpoint"""
+        endpoint = Endpoint(
+            path="/items",
+            method="POST",
+            operation_id="create_item",
+            request_body=RequestBody(
+                schema={
+                    "type": "object",
+                    "properties": {"name": {"type": "string"}},
+                    "required": ["name"],
+                }
+            ),
+            responses=[Response(status_code=201, description="Created")],
+        )
+
+        result = client.generate_test_plan(endpoint, sample_spec)
+
+        happy_path = result.test_cases[0]
+        assert happy_path.expected_status == 201
+        assert happy_path.request_body is not None
+
+    def test_generates_404_test_for_path_params(self, client, sample_spec):
+        """Should generate 404 test for endpoints with path parameters"""
+        endpoint = Endpoint(
+            path="/items/{id}",
+            method="GET",
+            operation_id="get_item",
+            parameters=[Parameter(name="id", location="path", required=True)],
+            responses=[
+                Response(status_code=200, description="OK"),
+                Response(status_code=404, description="Not found"),
+            ],
+        )
+
+        result = client.generate_test_plan(endpoint, sample_spec)
+
+        not_found_tests = [t for t in result.test_cases if t.expected_status == 404]
+        assert len(not_found_tests) == 1
+        assert "not_found" in not_found_tests[0].name
+
+    def test_generates_validation_test_for_post(self, client, sample_spec):
+        """Should generate validation error test for POST endpoints"""
+        endpoint = Endpoint(
+            path="/items",
+            method="POST",
+            operation_id="create_item",
+            request_body=RequestBody(
+                required=True,
+                schema={
+                    "type": "object",
+                    "properties": {"name": {"type": "string"}},
+                    "required": ["name"],
+                },
+            ),
+            responses=[
+                Response(status_code=201, description="Created"),
+                Response(status_code=422, description="Validation Error"),
+            ],
+        )
+
+        result = client.generate_test_plan(endpoint, sample_spec)
+
+        validation_tests = [t for t in result.test_cases if t.expected_status == 422]
+        assert len(validation_tests) == 1
+        assert validation_tests[0].request_body == {}
+
+    def test_generates_tests_for_all_endpoints(self, client, sample_spec):
+        """Should generate test plans for all endpoints in spec"""
+        sample_spec.endpoints = [
+            Endpoint(
+                path="/health", method="GET", responses=[Response(status_code=200)]
+            ),
+            Endpoint(
+                path="/items", method="GET", responses=[Response(status_code=200)]
+            ),
+            Endpoint(
+                path="/items", method="POST", responses=[Response(status_code=201)]
+            ),
+        ]
+
+        results = client.generate_tests_for_spec(sample_spec)
+
+        assert len(results) == 3
+
+
+class TestPytestGenerator:
+    """Tests for PytestGenerator"""
+
+    @pytest.fixture
+    def generator(self, tmp_path):
+        return PytestGenerator(output_dir=tmp_path)
+
+    @pytest.fixture
+    def sample_spec(self):
+        return NormalizedSpec(
+            title="Test API",
+            version="1.0.0",
+            base_url="http://localhost:8001",
+            endpoints=[
+                Endpoint(
+                    path="/health",
+                    method="GET",
+                    operation_id="health_check",
+                    responses=[Response(status_code=200, description="OK")],
+                ),
+                Endpoint(
+                    path="/items",
+                    method="GET",
+                    operation_id="list_items",
+                    responses=[Response(status_code=200, description="OK")],
+                ),
+                Endpoint(
+                    path="/items",
+                    method="POST",
+                    operation_id="create_item",
+                    request_body=RequestBody(
+                        schema={
+                            "type": "object",
+                            "properties": {"name": {"type": "string"}},
+                        }
+                    ),
+                    responses=[
+                        Response(status_code=201, description="Created"),
+                        Response(status_code=422, description="Validation Error"),
+                    ],
+                ),
+                Endpoint(
+                    path="/items/{id}",
+                    method="GET",
+                    operation_id="get_item",
+                    parameters=[Parameter(name="id", location="path", required=True)],
+                    responses=[
+                        Response(status_code=200, description="OK"),
+                        Response(status_code=404, description="Not Found"),
+                    ],
+                ),
+            ],
+            schemas={},
+        )
+
+    def test_generates_files(self, generator, sample_spec, tmp_path):
+        """Should generate test files"""
+        files = generator.generate(sample_spec, "test-spec-1")
+
+        assert len(files) > 0
+        assert all(f.exists() for f in files)
+
+    def test_generates_conftest(self, generator, sample_spec, tmp_path):
+        """Should generate conftest.py with fixtures"""
+        generator.generate(sample_spec, "test-spec-1")
+
+        conftest = tmp_path / "test-spec-1" / "conftest.py"
+        assert conftest.exists()
+        content = conftest.read_text()
+        assert "def base_url" in content
+        assert "def client" in content
+        assert "http://localhost:8001" in content
+
+    def test_generated_code_is_valid_python(self, generator, sample_spec, tmp_path):
+        """Generated test files should be syntactically valid Python"""
+        files = generator.generate(sample_spec, "test-spec-1")
+
+        for file_path in files:
+            content = file_path.read_text()
+            try:
+                ast.parse(content)
+            except SyntaxError as e:
+                pytest.fail(f"Generated file {file_path} has syntax error: {e}")
+
+    def test_test_names_are_stable(self, generator, sample_spec, tmp_path):
+        """Test names should be stable across generations"""
+        files1 = generator.generate(sample_spec, "test-spec-1")
+        files2 = generator.generate(sample_spec, "test-spec-2")
+
+        # Compare content (excluding conftest which has the same content)
+        for f1, f2 in zip(sorted(files1), sorted(files2)):
+            if f1.name == f2.name and f1.name != "conftest.py":
+                content1 = f1.read_text()
+                content2 = f2.read_text()
+                # Test names should match
+                assert content1 == content2
+
+    def test_groups_tests_by_resource(self, generator, sample_spec, tmp_path):
+        """Should group tests by resource (first path segment)"""
+        files = generator.generate(sample_spec, "test-spec-1")
+
+        test_files = [f for f in files if f.name.startswith("test_")]
+        # Should have test_health.py and test_items.py
+        file_names = {f.name for f in test_files}
+        assert "test_health.py" in file_names
+        assert "test_items.py" in file_names
+
+    def test_generates_httpx_requests(self, generator, sample_spec, tmp_path):
+        """Generated tests should use httpx client"""
+        files = generator.generate(sample_spec, "test-spec-1")
+
+        for file_path in files:
+            if file_path.name.startswith("test_"):
+                content = file_path.read_text()
+                # Should use client fixture methods
+                assert "client." in content
+
+
+class TestGeneratedTestCase:
+    """Tests for GeneratedTestCase dataclass"""
+
+    def test_defaults(self):
+        """GeneratedTestCase should have sensible defaults"""
+        tc = GeneratedTestCase(
+            name="test_example",
+            description="Test",
+            method="GET",
+            path="/test",
+            expected_status=200,
+        )
+        assert tc.assertions == []
+        assert tc.request_body is None
+        assert tc.path_params is None
diff --git a/tests/unit/test_junit_parser.py b/tests/unit/test_junit_parser.py
new file mode 100644
index 00000000..e4637a60
--- /dev/null
+++ b/tests/unit/test_junit_parser.py
@@ -0,0 +1,244 @@
+"""
+Unit tests for JUnit XML parser.
+"""
+
+import pytest
+from app.runner.junit_parser import (
+    parse_junit_xml,
+    get_failures,
+    JUnitReport,
+    JUnitTestCase,
+)
+
+
+class TestParseJunitXml:
+    """Tests for parse_junit_xml function"""
+
+    def test_parse_empty_testsuite(self):
+        """Should parse empty test suite"""
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<testsuite name="pytest" tests="0" failures="0" errors="0" skipped="0" time="0.001">
+</testsuite>"""
+
+        report = parse_junit_xml(xml)
+
+        assert report.total_tests == 0
+        assert report.total_passed == 0
+        assert report.total_failures == 0
+        assert report.success is True
+
+    def test_parse_all_passing(self):
+        """Should parse test suite with all passing tests"""
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<testsuite name="pytest" tests="3" failures="0" errors="0" skipped="0" time="1.234">
+    <testcase name="test_one" classname="tests.test_example" time="0.100"></testcase>
+    <testcase name="test_two" classname="tests.test_example" time="0.200"></testcase>
+    <testcase name="test_three" classname="tests.test_example" time="0.300"></testcase>
+</testsuite>"""
+
+        report = parse_junit_xml(xml)
+
+        assert report.total_tests == 3
+        assert report.total_passed == 3
+        assert report.total_failures == 0
+        assert report.success is True
+        assert len(report.suites) == 1
+        assert len(report.suites[0].test_cases) == 3
+
+    def test_parse_with_failures(self):
+        """Should parse test suite with failures"""
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<testsuite name="pytest" tests="2" failures="1" errors="0" skipped="0" time="0.500">
+    <testcase name="test_pass" classname="tests.test_example" time="0.100"></testcase>
+    <testcase name="test_fail" classname="tests.test_example" time="0.200">
+        <failure message="AssertionError: assert 1 == 2">
+def test_fail():
+    assert 1 == 2
+AssertionError: assert 1 == 2
+        </failure>
+    </testcase>
+</testsuite>"""
+
+        report = parse_junit_xml(xml)
+
+        assert report.total_tests == 2
+        assert report.total_passed == 1
+        assert report.total_failures == 1
+        assert report.success is False
+
+        failures = get_failures(report)
+        assert len(failures) == 1
+        assert failures[0].name == "test_fail"
+        assert "assert 1 == 2" in failures[0].failure_message
+
+    def test_parse_with_errors(self):
+        """Should parse test suite with errors"""
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<testsuite name="pytest" tests="1" failures="0" errors="1" skipped="0" time="0.100">
+    <testcase name="test_error" classname="tests.test_example" time="0.100">
+        <error message="ValueError: something went wrong" type="ValueError">
+Traceback...
+        </error>
+    </testcase>
+</testsuite>"""
+
+        report = parse_junit_xml(xml)
+
+        assert report.total_tests == 1
+        assert report.total_errors == 1
+        assert report.success is False
+
+        failures = get_failures(report)
+        assert len(failures) == 1
+        assert failures[0].status == "error"
+
+    def test_parse_with_skipped(self):
+        """Should parse test suite with skipped tests"""
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<testsuite name="pytest" tests="2" failures="0" errors="0" skipped="1" time="0.200">
+    <testcase name="test_pass" classname="tests.test_example" time="0.100"></testcase>
+    <testcase name="test_skip" classname="tests.test_example" time="0.001">
+        <skipped message="Skipped: not implemented yet"></skipped>
+    </testcase>
+</testsuite>"""
+
+        report = parse_junit_xml(xml)
+
+        assert report.total_tests == 2
+        assert report.total_passed == 1
+        assert report.total_skipped == 1
+        assert report.success is True
+
+    def test_parse_testsuites_root(self):
+        """Should parse XML with testsuites as root element"""
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<testsuites>
+    <testsuite name="suite1" tests="1" failures="0" errors="0" skipped="0" time="0.100">
+        <testcase name="test_one" classname="tests.suite1" time="0.100"></testcase>
+    </testsuite>
+    <testsuite name="suite2" tests="1" failures="0" errors="0" skipped="0" time="0.100">
+        <testcase name="test_two" classname="tests.suite2" time="0.100"></testcase>
+    </testsuite>
+</testsuites>"""
+
+        report = parse_junit_xml(xml)
+
+        assert len(report.suites) == 2
+        assert report.total_tests == 2
+
+    def test_parse_invalid_xml_raises_error(self):
+        """Should raise error for invalid XML"""
+        xml = "not valid xml <<<"
+
+        with pytest.raises(ValueError, match="Invalid XML"):
+            parse_junit_xml(xml)
+
+    def test_parse_unexpected_root_raises_error(self):
+        """Should raise error for unexpected root element"""
+        xml = """<?xml version="1.0"?><something></something>"""
+
+        with pytest.raises(ValueError, match="Unexpected root element"):
+            parse_junit_xml(xml)
+
+    def test_extracts_stdout_stderr(self):
+        """Should extract system-out and system-err"""
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<testsuite name="pytest" tests="1" failures="0" errors="0" skipped="0" time="0.100">
+    <testcase name="test_output" classname="tests.test_example" time="0.100">
+        <system-out>This is stdout</system-out>
+        <system-err>This is stderr</system-err>
+    </testcase>
+</testsuite>"""
+
+        report = parse_junit_xml(xml)
+
+        tc = report.suites[0].test_cases[0]
+        assert tc.stdout == "This is stdout"
+        assert tc.stderr == "This is stderr"
+
+    def test_calculates_duration(self):
+        """Should calculate total duration from suites"""
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<testsuites>
+    <testsuite name="suite1" tests="1" failures="0" errors="0" skipped="0" time="1.5">
+        <testcase name="test_one" classname="tests" time="1.5"></testcase>
+    </testsuite>
+    <testsuite name="suite2" tests="1" failures="0" errors="0" skipped="0" time="2.5">
+        <testcase name="test_two" classname="tests" time="2.5"></testcase>
+    </testsuite>
+</testsuites>"""
+
+        report = parse_junit_xml(xml)
+
+        assert report.total_time == 4.0
+
+
+class TestGetFailures:
+    """Tests for get_failures function"""
+
+    def test_returns_empty_for_all_passing(self):
+        """Should return empty list when all tests pass"""
+        report = JUnitReport(total_tests=2, total_passed=2)
+        assert get_failures(report) == []
+
+    def test_returns_failed_tests(self):
+        """Should return failed test cases"""
+        from app.runner.junit_parser import TestSuiteResult
+
+        report = JUnitReport(
+            suites=[
+                TestSuiteResult(
+                    name="test",
+                    tests=2,
+                    failures=1,
+                    errors=0,
+                    skipped=0,
+                    time=1.0,
+                    test_cases=[
+                        JUnitTestCase(
+                            name="test_pass", classname="t", time=0.1, status="passed"
+                        ),
+                        JUnitTestCase(
+                            name="test_fail",
+                            classname="t",
+                            time=0.1,
+                            status="failed",
+                            failure_message="assertion failed",
+                        ),
+                    ],
+                )
+            ],
+            total_tests=2,
+            total_failures=1,
+        )
+
+        failures = get_failures(report)
+
+        assert len(failures) == 1
+        assert failures[0].name == "test_fail"
+        assert failures[0].failure_message == "assertion failed"
+
+
+class TestJUnitReport:
+    """Tests for JUnitReport dataclass"""
+
+    def test_success_property_true(self):
+        """success should be True when no failures or errors"""
+        report = JUnitReport(
+            total_tests=5, total_passed=5, total_failures=0, total_errors=0
+        )
+        assert report.success is True
+
+    def test_success_property_false_with_failures(self):
+        """success should be False when there are failures"""
+        report = JUnitReport(
+            total_tests=5, total_passed=4, total_failures=1, total_errors=0
+        )
+        assert report.success is False
+
+    def test_success_property_false_with_errors(self):
+        """success should be False when there are errors"""
+        report = JUnitReport(
+            total_tests=5, total_passed=4, total_failures=0, total_errors=1
+        )
+        assert report.success is False
diff --git a/tests/unit/test_openapi_parser.py b/tests/unit/test_openapi_parser.py
new file mode 100644
index 00000000..8b9b63d2
--- /dev/null
+++ b/tests/unit/test_openapi_parser.py
@@ -0,0 +1,345 @@
+"""
+Unit tests for OpenAPI parser.
+"""
+
+import pytest
+from app.openapi_parser import (
+    parse_spec,
+    normalize_spec,
+    parse_and_normalize,
+    OpenAPIParseError,
+    NormalizedSpec,
+)  # noqa: F401
+
+
+class TestParseSpec:
+    """Tests for parse_spec function"""
+
+    def test_parse_valid_yaml(self):
+        """Should parse valid YAML content"""
+        content = """
+openapi: "3.0.0"
+info:
+  title: Test API
+  version: "1.0.0"
+paths: {}
+"""
+        result = parse_spec(content)
+        assert result["openapi"] == "3.0.0"
+        assert result["info"]["title"] == "Test API"
+
+    def test_parse_valid_json(self):
+        """Should parse valid JSON content"""
+        content = '{"openapi": "3.0.0", "info": {"title": "Test API", "version": "1.0.0"}, "paths": {}}'
+        result = parse_spec(content)
+        assert result["openapi"] == "3.0.0"
+        assert result["info"]["title"] == "Test API"
+
+    def test_parse_empty_content_raises_error(self):
+        """Should raise error for empty content"""
+        with pytest.raises(OpenAPIParseError, match="Empty spec content"):
+            parse_spec("")
+
+    def test_parse_whitespace_only_raises_error(self):
+        """Should raise error for whitespace-only content"""
+        with pytest.raises(OpenAPIParseError, match="Empty spec content"):
+            parse_spec("   \n\t  ")
+
+    def test_parse_invalid_json_raises_error(self):
+        """Should raise error for invalid JSON"""
+        with pytest.raises(OpenAPIParseError, match="Invalid JSON"):
+            parse_spec('{"invalid": json}')
+
+    def test_parse_invalid_yaml_raises_error(self):
+        """Should raise error for invalid YAML"""
+        with pytest.raises(OpenAPIParseError, match="Invalid YAML"):
+            parse_spec("key: [unclosed")
+
+    def test_parse_non_object_raises_error(self):
+        """Should raise error when spec is not an object"""
+        with pytest.raises(OpenAPIParseError, match="must be a YAML/JSON object"):
+            parse_spec("- item1\n- item2")
+
+
+class TestNormalizeSpec:
+    """Tests for normalize_spec function"""
+
+    def test_normalize_minimal_spec(self):
+        """Should normalize a minimal valid spec"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test API", "version": "1.0.0"},
+            "paths": {},
+        }
+        result = normalize_spec(raw)
+        assert result.title == "Test API"
+        assert result.version == "1.0.0"
+        assert result.endpoints == []
+
+    def test_normalize_extracts_base_url(self):
+        """Should extract base URL from servers"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "servers": [{"url": "http://localhost:8000"}],
+            "paths": {},
+        }
+        result = normalize_spec(raw)
+        assert result.base_url == "http://localhost:8000"
+
+    def test_normalize_missing_openapi_version_raises_error(self):
+        """Should raise error when openapi version is missing"""
+        raw = {"info": {"title": "Test", "version": "1.0"}, "paths": {}}
+        with pytest.raises(OpenAPIParseError, match="Missing 'openapi' version"):
+            normalize_spec(raw)
+
+    def test_normalize_swagger_2_raises_error(self):
+        """Should raise error for Swagger 2.x specs"""
+        raw = {
+            "swagger": "2.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "paths": {},
+        }
+        with pytest.raises(
+            OpenAPIParseError, match="Swagger/OpenAPI 2.x not supported"
+        ):
+            normalize_spec(raw)
+
+    def test_normalize_missing_title_raises_error(self):
+        """Should raise error when title is missing"""
+        raw = {"openapi": "3.0.0", "info": {"version": "1.0"}, "paths": {}}
+        with pytest.raises(
+            OpenAPIParseError, match="Missing required field: info.title"
+        ):
+            normalize_spec(raw)
+
+    def test_normalize_missing_version_raises_error(self):
+        """Should raise error when version is missing"""
+        raw = {"openapi": "3.0.0", "info": {"title": "Test"}, "paths": {}}
+        with pytest.raises(
+            OpenAPIParseError, match="Missing required field: info.version"
+        ):
+            normalize_spec(raw)
+
+    def test_normalize_extracts_endpoints(self):
+        """Should extract all endpoints with correct methods"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "paths": {
+                "/items": {
+                    "get": {"summary": "List items", "responses": {"200": {}}},
+                    "post": {"summary": "Create item", "responses": {"201": {}}},
+                },
+                "/items/{id}": {
+                    "get": {"summary": "Get item", "responses": {"200": {}}}
+                },
+            },
+        }
+        result = normalize_spec(raw)
+        assert len(result.endpoints) == 3
+
+        # Check endpoints are sorted
+        paths_methods = [(e.path, e.method) for e in result.endpoints]
+        assert ("/items", "GET") in paths_methods
+        assert ("/items", "POST") in paths_methods
+        assert ("/items/{id}", "GET") in paths_methods
+
+    def test_normalize_extracts_parameters(self):
+        """Should extract path and query parameters"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "paths": {
+                "/items/{id}": {
+                    "get": {
+                        "parameters": [
+                            {
+                                "name": "id",
+                                "in": "path",
+                                "required": True,
+                                "schema": {"type": "string"},
+                            },
+                            {
+                                "name": "include",
+                                "in": "query",
+                                "schema": {"type": "string"},
+                            },
+                        ],
+                        "responses": {"200": {}},
+                    }
+                }
+            },
+        }
+        result = normalize_spec(raw)
+        endpoint = result.endpoints[0]
+        assert len(endpoint.parameters) == 2
+
+        id_param = next(p for p in endpoint.parameters if p.name == "id")
+        assert id_param.location == "path"
+        assert id_param.required is True
+
+        include_param = next(p for p in endpoint.parameters if p.name == "include")
+        assert include_param.location == "query"
+        assert include_param.required is False
+
+    def test_normalize_extracts_request_body(self):
+        """Should extract request body schema"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "paths": {
+                "/items": {
+                    "post": {
+                        "requestBody": {
+                            "required": True,
+                            "content": {
+                                "application/json": {
+                                    "schema": {
+                                        "type": "object",
+                                        "properties": {"name": {"type": "string"}},
+                                    }
+                                }
+                            },
+                        },
+                        "responses": {"201": {}},
+                    }
+                }
+            },
+        }
+        result = normalize_spec(raw)
+        endpoint = result.endpoints[0]
+        assert endpoint.request_body is not None
+        assert endpoint.request_body.required is True
+        assert endpoint.request_body.content_type == "application/json"
+        assert endpoint.request_body.schema["type"] == "object"
+
+    def test_normalize_extracts_responses(self):
+        """Should extract response schemas"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "paths": {
+                "/items": {
+                    "get": {
+                        "responses": {
+                            "200": {
+                                "description": "Success",
+                                "content": {
+                                    "application/json": {"schema": {"type": "array"}}
+                                },
+                            },
+                            "404": {"description": "Not found"},
+                        }
+                    }
+                }
+            },
+        }
+        result = normalize_spec(raw)
+        endpoint = result.endpoints[0]
+        assert len(endpoint.responses) == 2
+
+        ok_response = next(r for r in endpoint.responses if r.status_code == 200)
+        assert ok_response.description == "Success"
+        assert ok_response.schema["type"] == "array"
+
+        not_found = next(r for r in endpoint.responses if r.status_code == 404)
+        assert not_found.description == "Not found"
+        assert not_found.schema is None
+
+    def test_normalize_resolves_schema_refs(self):
+        """Should resolve $ref references in schemas"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "paths": {
+                "/items": {
+                    "post": {
+                        "requestBody": {
+                            "content": {
+                                "application/json": {
+                                    "schema": {"$ref": "#/components/schemas/Item"}
+                                }
+                            }
+                        },
+                        "responses": {"201": {}},
+                    }
+                }
+            },
+            "components": {
+                "schemas": {
+                    "Item": {
+                        "type": "object",
+                        "required": ["name"],
+                        "properties": {"name": {"type": "string"}},
+                    }
+                }
+            },
+        }
+        result = normalize_spec(raw)
+        endpoint = result.endpoints[0]
+        assert endpoint.request_body.schema["type"] == "object"
+        assert "name" in endpoint.request_body.schema["properties"]
+
+    def test_normalize_extracts_schemas(self):
+        """Should extract component schemas"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "paths": {},
+            "components": {
+                "schemas": {"Item": {"type": "object"}, "Error": {"type": "object"}}
+            },
+        }
+        result = normalize_spec(raw)
+        assert "Item" in result.schemas
+        assert "Error" in result.schemas
+
+
+class TestParseAndNormalize:
+    """Tests for the convenience function"""
+
+    def test_parse_and_normalize_yaml(self):
+        """Should parse and normalize YAML in one step"""
+        content = """
+openapi: "3.0.0"
+info:
+  title: Test API
+  version: "1.0.0"
+paths:
+  /health:
+    get:
+      responses:
+        "200":
+          description: OK
+"""
+        result = parse_and_normalize(content)
+        assert isinstance(result, NormalizedSpec)
+        assert result.title == "Test API"
+        assert len(result.endpoints) == 1
+        assert result.endpoints[0].path == "/health"
+
+
+class TestEndpointOrdering:
+    """Tests for deterministic endpoint ordering"""
+
+    def test_endpoints_sorted_by_path_then_method(self):
+        """Endpoints should be sorted by path, then by method"""
+        raw = {
+            "openapi": "3.0.0",
+            "info": {"title": "Test", "version": "1.0"},
+            "paths": {
+                "/z": {"post": {"responses": {}}, "get": {"responses": {}}},
+                "/a": {"delete": {"responses": {}}, "get": {"responses": {}}},
+            },
+        }
+        result = normalize_spec(raw)
+
+        paths_methods = [(e.path, e.method) for e in result.endpoints]
+        # Should be sorted: /a first, then /z; within each path, methods sorted alphabetically
+        assert paths_methods == [
+            ("/a", "DELETE"),
+            ("/a", "GET"),
+            ("/z", "GET"),
+            ("/z", "POST"),
+        ]