diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..22819dbd
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,3 @@
+# Optional configuration. Defaults are applied if unset.
+RAG_MIN_SCORE=0.25
+# API_URL=http://127.0.0.1:8000
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..a7c8946a
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,50 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - main
+      - feature/customer-faq-assistant-cameron-d
+  pull_request:
+    branches:
+      - main
+      - feature/customer-faq-assistant-cameron-d
+  workflow_dispatch:
+
+concurrency:
+  group: ci-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-matrix:
+    name: Tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    env:
+      PIP_EXTRA_INDEX_URL: https://download.pytorch.org/whl/cpu
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+          cache-dependency-path: requirements.txt
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -r requirements.txt
+          python -m pip install tox
+
+      - name: Run tests
+        env:
+          PY_VER: ${{ matrix.python-version }}
+        run: python -m tox -e py${PY_VER//./}
diff --git a/.gitignore b/.gitignore
index e69de29b..4cce859c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,57 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+build/
+dist/
+*.egg-info/
+.eggs/
+
+# Virtual environments
+.venv/
+.venv*/
+venv/
+env/
+ENV/
+
+# Tool caches
+.mypy_cache/
+.pytest_cache/
+.ruff_cache/
+.coverage
+.coverage.*
+htmlcov/
+.tox/
+.nox/
+
+# Logs
+*.log
+
+# Environment / secrets
+.env
+.env.*
+!.env.example
+
+# Jupyter
+.ipynb_checkpoints/
+
+# IDE/editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Streamlit local secrets/config overrides
+.streamlit/secrets.toml
+
+# Local vector DB / embeddings artifacts
+chroma/
+chroma_db/
+.chroma/
+*.sqlite3
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..e7a51a76
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.11-slim
+
+WORKDIR /app
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+
+COPY app /app/app
+COPY data /app/data
+
+EXPOSE 8000
+
+CMD ["python", "-m", "uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
index 494f1c75..eabe74e9 100644
--- a/README.md
+++ b/README.md
@@ -1,43 +1,144 @@
-# Candidate Assessment: Spec-Driven Development With Codegen Tools
-
-This assessment evaluates how you use modern code generation tools (for example `5.2-Codex`, `Claude`, `Copilot`, and similar) to design, build, and test a software application using a spec-driven development pattern. You may build a frontend, a backend, or both.
-
-## Goals
-- Build a working application with at least one meaningful feature.
-- Create a testing framework to validate the application.
-- Demonstrate effective use of code generation tools to accelerate delivery.
-- Show clear, maintainable engineering practices.
-
-## Deliverables
-- Application source code in this repository.
-- A test suite and test harness that can be run locally.
-- Documentation that explains how to run the app and the tests.
-
-## Scope Options
-Pick one:
-- Frontend-only application.
-- Backend-only application.
-- Full-stack application.
-
-Your solution should include at least one real workflow, for example:
-- Create and view a resource.
-- Search or filter data.
-- Persist data in memory or storage.
-
-## Rules
-- You must use a code generation tool (for example `5.2-Codex`, `Claude`, or similar). You can use multiple tools.
-- You must build the application and a testing framework for it.
-- The application and tests must run locally.
-- Do not include secrets or credentials in this repository.
-
-## Evaluation Criteria
-- Working product: Does the app do what it claims?
-- Test coverage: Do tests cover key workflows and edge cases?
-- Engineering quality: Clarity, structure, and maintainability.
-- Use of codegen: How effectively you used tools to accelerate work.
-- Documentation: Clear setup and run instructions.
-
-## What to Submit
-- When you are complete, put up a Pull Request against this repository with your changes.
-- A short summary of your approach and tools used in your PR submission
-- Any additional information or approach that helped you.
+# Customer FAQ Assistant (Mockridge Bank)
+
+[![CI](https://github.com/CameronDetig/spec-driven-development/actions/workflows/ci.yml/badge.svg?branch=feature/customer-faq-assistant-cameron-d)](https://github.com/CameronDetig/spec-driven-development/actions/workflows/ci.yml)
+![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue)
+
+Spec-driven local RAG assistant built with FastAPI + Streamlit.
+
+## What It Does
+- Answers customer questions relating to products and services for a fictional bank (Mockridge Bank) 
+- Available through API or Streamlit UI
+- Retrieves relevant local FAQ documents from ChromaDB
+- Generates answers with:
+  - `mock` deterministic mode (default, test-friendly)
+  - `flan-t5` optional mode (small locally running instruction-tuned LLM, no API key required)
+- Returns answer + cited sources
+
+## Tech Stack
+- Python 3.10, 3.11, or 3.12
+- FastAPI
+- Streamlit
+- ChromaDB
+- LangChain (`langchain`, `langchain-huggingface`, `langchain-chroma`)
+- pytest
+
+## Preview
+
+<img src="images/streamlit_ui.png" alt="Streamlit UI" width="600">
+
+## Quick Start
+
+### 1) Setup
+```bash
+python run.py setup
+```
+
+What setup does:
+- creates/uses `.venv` (unless `--no-venv`)
+- installs dependencies
+- builds retrieval DB
+
+If you want to be able to use the LLM option, run setup with this command. It will download the flan-t5 model (~300MB):
+```bash
+python run.py setup --with-llm
+```
+
+### 2) Run Full Stack
+```bash
+python run.py fullstack
+```
+
+Endpoints:
+- API: `http://127.0.0.1:8000`
+- UI: `http://127.0.0.1:8501`
+
+### API Documentation
+When the API is running, interactive documentation is available at:
+- Swagger UI: http://localhost:8000/docs
+- ReDoc: http://localhost:8000/redoc
+
+## Run Commands
+
+```bash
+python run.py help
+python run.py api
+python run.py ui
+python run.py fullstack
+python run.py test
+python run.py test-matrix
+```
+
+`test-matrix` runs tox environments for available interpreters (`py310`, `py311`, `py312`).
+
+## Testing
+
+Single environment:
+```bash
+python run.py test
+```
+
+Multi-python matrix:
+```bash
+python run.py test-matrix
+```
+
+Direct:
+```bash
+python -m pytest -q
+tox
+```
+
+## CI (GitHub Actions)
+
+Workflow: `.github/workflows/ci.yml`
+
+Triggers:
+- push to:
+  - `main`
+  - `feature/customer-faq-assistant-cameron-d`
+- pull_request to:
+  - `main`
+  - `feature/customer-faq-assistant-cameron-d`
+- manual (`workflow_dispatch`)
+
+CI job:
+- Python matrix: 3.10 / 3.11 / 3.12
+- installs `requirements.txt`
+- runs `pytest -q`
+
+## API Overview
+
+- `GET /health`
+- `GET /db/status`
+- `POST /db/build`
+- `POST /ask`
+
+Example request:
+```json
+{
+  "question": "What can I do with the mobile app?",
+  "top_k": 3,
+  "generator": "mock"
+}
+```
+
+## Environment Variables
+
+- `RAG_MIN_SCORE` (minimum similarity score for retrieved documents. default: `0.25`)
+- `API_URL` (used by Streamlit UI; default `http://127.0.0.1:8000`)
+
+## Project Structure
+
+- `app/main.py` API routes
+- `app/retrieval.py` RAG retrieval/indexing logic
+- `app/generation.py` generator selection and LLM adapter
+- `app/rag_chain.py` LangChain prompt/chain
+- `ui/streamlit_app.py` UI
+- `SPECS/` authoritative feature specs
+- `tests/` test suite
+- `run.py` project entrypoint
+
+## Notes
+- Default mode is deterministic and intended for local testing/CI.
+- Optional `flan-t5` mode uses Google's Flan-T5-small (80M params) for local experimentation and requires model assets (~308MB download via `python run.py setup --with-llm`).
+- Project uses CPU-only PyTorch for faster setup and smaller footprint (~200MB vs ~2.5GB). GPU acceleration is unnecessary for the small models used in this project.
\ No newline at end of file
diff --git a/SPECS/ask-endpoint-validation.md b/SPECS/ask-endpoint-validation.md
new file mode 100644
index 00000000..836ba1a2
--- /dev/null
+++ b/SPECS/ask-endpoint-validation.md
@@ -0,0 +1,44 @@
+# Feature Spec: Ask Endpoint Validation
+
+## Goal
+- Enforce strict, predictable request validation for `POST /ask` so API behavior is safe and testable.
+
+## Scope
+- In:
+    - Validate request body fields `question` and `top_k`.
+    - Return consistent 400 errors for invalid input.
+- Out:
+    - Retrieval ranking behavior.
+    - Answer generation quality.
+
+## Requirements
+- Endpoint:
+    - `POST /ask`
+- Request schema:
+    - `question` is required string with length `5..300`.
+    - `top_k` is optional integer with default `3` and valid range `1..5`.
+    - `generator` is optional string with allowed values `mock` or `flan-t5`.
+- Validation failures MUST return HTTP 400.
+- Validation failures include:
+    - Missing `question`.
+    - `question` length below 5.
+    - `question` length above 300.
+    - `top_k` below 1.
+    - `top_k` above 5.
+    - `generator` not in allowed values.
+- Error responses MUST be JSON and machine-parseable.
+- Error responses MUST include a top-level `detail` field suitable for user-facing validation feedback.
+- Validation MUST run before retrieval or generation logic executes.
+- If DB is not built, endpoint MUST return `503` with actionable guidance to build DB first.
+
+## Related Specifications
+- `ask-response-contract.md` - Defines success response schema for valid requests
+- `retrieval-pipeline.md` - Processes validated requests
+
+## Acceptance Criteria
+- [x] Missing `question` returns `400`. (test_validation.py::test_ask_missing_question_returns_400)
+- [x] `question` shorter than 5 characters returns `400`. (test_validation.py::test_ask_question_too_short_returns_400)
+- [x] `question` longer than 300 characters returns `400`. (test_validation.py::test_ask_question_too_long_returns_400)
+- [x] `top_k = 0` returns `400`. (test_validation.py::test_ask_top_k_zero_returns_400)
+- [x] `top_k > 5` returns `400`. (test_validation.py::test_ask_top_k_above_range_returns_400)
+- [x] Omitted `top_k` is accepted and treated as `3`. (test_validation.py::test_ask_omitted_top_k_uses_default)
diff --git a/SPECS/ask-response-contract.md b/SPECS/ask-response-contract.md
new file mode 100644
index 00000000..ca970d74
--- /dev/null
+++ b/SPECS/ask-response-contract.md
@@ -0,0 +1,46 @@
+# Feature Spec: Ask Response Contract
+
+## Goal
+- Guarantee a stable JSON response contract from `POST /ask` across success and fallback paths.
+
+## Scope
+- In:
+    - Response schema for status `200`.
+    - Source object field contract.
+    - Retrieval metadata contract.
+- Out:
+    - HTTP error schema details outside `POST /ask` success response.
+
+## Requirements
+- `POST /ask` successful response MUST include keys:
+    - `answer`
+    - `sources`
+    - `retrieval`
+- `answer` MUST be a non-empty string.
+- `sources` MUST be an array (possibly empty).
+- Each `sources` item MUST include:
+    - `id` (non-empty string)
+    - `title` (non-empty string)
+    - `snippet` (non-empty string)
+    - `score` (number)
+- `retrieval` MUST include:
+    - `top_k` (integer)
+    - `matched` (integer)
+- Contract MUST remain stable for:
+    - Matched retrieval response.
+    - Fallback response (no sources).
+- In fallback response:
+    - `sources` MUST equal `[]`.
+    - `retrieval.matched` MUST equal `0`.
+
+## Related Specifications
+- `ask-endpoint-validation.md` - Defines request validation before response generation
+- `retrieval-pipeline.md` - Provides source data for response
+- `generation-mock.md` - Generates answer content for response
+
+## Acceptance Criteria
+- [x] Contract test validates required top-level keys exist on every 200 response. (test_contract.py::test_ask_response_contract_contains_required_top_level_keys)
+- [x] Contract test validates source item field presence and scalar types. (test_contract.py::test_ask_response_sources_have_required_fields_when_present)
+- [x] Contract test validates `answer`, `id`, `title`, and `snippet` are non-empty strings. (test_contract.py::test_ask_response_sources_have_required_fields_when_present)
+- [x] Fallback path preserves full schema and uses empty `sources`. (test_contract.py::test_ask_fallback_keeps_stable_schema)
+- [x] Retrieval metadata fields are always present and consistent with payload. (test_contract.py::test_ask_response_retrieval_metadata_has_required_fields)
diff --git a/SPECS/docker-optional.md b/SPECS/docker-optional.md
new file mode 100644
index 00000000..da1b7734
--- /dev/null
+++ b/SPECS/docker-optional.md
@@ -0,0 +1,35 @@
+# Feature Spec: Optional Docker Runtime
+
+## Goal
+- Provide an optional containerized workflow for running the Customer FAQ Assistant with consistent local environments.
+
+## Scope
+- In:
+    - Optional `Dockerfile` for the API runtime.
+    - Optional `docker-compose.yml` for API + Streamlit UI orchestration.
+    - Clear commands for building and running containers locally.
+    - Non-blocking usage that does not replace the default local workflow.
+- Out:
+    - Mandatory Docker dependency for development or test execution.
+    - Production-grade orchestration, autoscaling, or cloud deployment.
+
+## Requirements
+- Docker support MUST be optional and MUST NOT be required to run tests locally.
+- Default reviewer workflow MUST remain:
+    - local Python setup
+    - local `pytest` execution
+- If implemented, Docker artifacts MUST include:
+    - `Dockerfile` for API service
+    - `docker-compose.yml` for API and UI services
+- Container configuration MUST avoid embedding secrets or credentials.
+- Docker workflow MUST expose default local ports for API and Streamlit UI.
+- Documentation MUST state:
+    - Docker is optional
+    - Local non-Docker setup is fully supported
+    - How to build and run Docker services
+
+## Acceptance Criteria
+- [x] Project runs locally without Docker and all required tests execute.
+- [x] API can be built and started via Docker. (Dockerfile, docker-compose.yml::api)
+- [x] Optional compose workflow can start API and UI together. (docker-compose.yml, run.py::cmd_docker_fullstack)
+- [x] Documentation clearly separates optional Docker commands from default local workflow. (README.md)
diff --git a/SPECS/entrypoint-cli.md b/SPECS/entrypoint-cli.md
new file mode 100644
index 00000000..33b834f7
--- /dev/null
+++ b/SPECS/entrypoint-cli.md
@@ -0,0 +1,60 @@
+# Feature Spec: Entrypoint CLI (run.py)
+
+## Goal
+- Provide a single cross-platform entry point for setup, running services, and tests.
+
+## Scope
+- In:
+    - A `run.py` script that manages setup, API, UI, and test commands.
+    - Cross-platform support for macOS, Linux, and Windows.
+    - Clear help output with available commands.
+- Out:
+    - Full environment management beyond project dependencies.
+    - Complex process supervision or daemonization.
+
+## Requirements
+- `run.py` MUST support these commands:
+    - `setup`: Install dependencies.
+    - `setup --with-llm`: Install dependencies and download LLM assets.
+    - `api`: Run the FastAPI backend.
+    - `ui`: Run the Streamlit UI.
+    - `fullstack`: Run API and UI concurrently.
+    - `test`: Run pytest.
+    - `test-matrix`: Run tox-based multi-Python test matrix.
+    - `help`: Show available commands and examples.
+- `run.py` SHOULD support optional Docker helper commands:
+    - `docker-build`: Build container images.
+    - `docker-api`: Run API container.
+    - `docker-fullstack`: Run API and UI containers together.
+    - `docker-down`: Stop/remove running Docker Compose services.
+- `run.py` MUST be cross-platform and use `sys.executable` for subprocess calls.
+- `run.py` MUST default to creating and using a local `.venv` for setup.
+- `run.py` MUST support a `--no-venv` option to install into the current environment instead.
+- `run.py setup` MUST detect and prefer a supported Python interpreter for virtual environment creation.
+- Supported interpreter range for full project setup MUST be:
+    - `>=3.10`
+    - `<3.13`
+- `run.py setup` SHOULD prefer `3.12`, then `3.11`, then `3.10`, before fallback.
+- `run.py setup` MUST support overriding interpreter selection via `--python <path-or-command>`.
+- If no supported interpreter is found, `run.py setup` MUST fail with actionable guidance.
+- `run.py setup` SHOULD support optional automated Python installation via `--install-python` and fail clearly if package-manager installation is unavailable.
+- `fullstack` command MUST start API and UI on their default ports and shut down cleanly on Ctrl+C.
+- Commands MUST print clear status messages and fail clearly with actionable errors.
+- `test` and `test-matrix` commands SHOULD be suitable for GitHub Actions CI execution without interactive prompts.
+- Docker helper commands MUST fail clearly when Docker is unavailable and MUST keep local non-Docker commands fully usable.
+
+## Acceptance Criteria
+- [x] `python run.py help` prints usage and available commands. (run.py::show_help)
+- [x] `python run.py setup` installs dependencies without requiring manual venv steps. (run.py::cmd_setup)
+- [x] `python run.py setup --with-llm` downloads LLM assets for flan-t5. (run.py::cmd_setup + _download_llm_assets)
+- [x] `python run.py setup --no-venv` installs dependencies into the current environment. (run.py::cmd_setup)
+- [x] `python run.py setup` automatically selects a supported interpreter (`3.12`/`3.11`/`3.10`) for `.venv` creation when available. (run.py::_select_supported_python_command)
+- [x] `python run.py setup --python <interpreter>` uses the specified interpreter when supported. (run.py::cmd_setup, line 248-253)
+- [x] `python run.py setup` fails with clear guidance when no supported interpreter is available. (run.py::cmd_setup, line 264-270)
+- [x] `python run.py setup --install-python` attempts package-manager Python installation and then continues setup when possible. (run.py::cmd_setup + _attempt_python_install)
+- [x] `python run.py api` starts the backend. (run.py::cmd_api)
+- [x] `python run.py ui` starts the Streamlit UI. (run.py::cmd_ui)
+- [x] `python run.py fullstack` starts API and UI together and stops them on Ctrl+C. (run.py::cmd_fullstack)
+- [x] `python run.py test` runs pytest successfully. (run.py::cmd_test)
+- [x] `python run.py test-matrix` runs tox matrix environments (for available local interpreters). (run.py::cmd_test_matrix)
+- [x] `python run.py docker-build`, `python run.py docker-api`, `python run.py docker-fullstack`, and `python run.py docker-down` work when Docker is installed and fail with clear guidance when Docker is unavailable. (run.py::cmd_docker_*)
diff --git a/SPECS/faq-data.md b/SPECS/faq-data.md
new file mode 100644
index 00000000..18849a42
--- /dev/null
+++ b/SPECS/faq-data.md
@@ -0,0 +1,46 @@
+# Feature Spec: FAQ Corpus Data
+
+## Goal
+- Define a small, local, version-controlled FAQ corpus that powers retrieval tests and API behavior.
+
+## Scope
+- In:
+    - Local FAQ files under `data/`.
+    - Required document fields and minimum corpus breadth.
+    - Deterministic, test-friendly content expectations.
+- Out:
+    - External content sources or dynamic ingestion pipelines.
+
+## Requirements
+- Corpus MUST contain between 8 and 15 documents.
+- Corpus content MUST represent the fictitious institution name as `Mockridge Bank`.
+- Each document MUST include:
+    - `id` (unique identifier)
+    - `title` (document title)
+    - `body` (document content)
+- For markdown documents, required fields MUST be explicitly present as:
+    - `id: <value>`
+    - `title: <value>`
+- Corpus MUST represent core Mockridge Bank topics, including:
+    - Checking accounts
+    - Savings accounts
+    - Auto loans
+    - Credit cards
+    - Overdraft fees
+    - Fraud/disputes
+    - Mobile app
+    - Support hours
+- Document content MUST be stable and human-readable.
+- Corpus format MAY be markdown or JSON, but parser behavior MUST be documented.
+- Document IDs MUST be unique across corpus.
+- Corpus MUST be local and committed to repository.
+- Corpus MUST NOT include sensitive data, credentials, or personal information.
+
+## Related Specifications
+- `retrieval-pipeline.md` - Consumes FAQ corpus for semantic search
+
+## Acceptance Criteria
+- [x] Data loader can parse all corpus files without runtime errors. (test_data_loader.py::test_faq_docs_have_required_fields_and_non_empty_values)
+- [x] Document IDs are unique and non-empty. (test_data_loader.py::test_faq_document_ids_are_unique)
+- [x] At least one retrieval test depends on known corpus content and passes. (test_retrieval.py::test_known_query_returns_expected_top_document)
+- [x] Corpus size is within defined range (8-15). (test_data_loader.py::test_faq_corpus_size_within_expected_range)
diff --git a/SPECS/feature-template.md b/SPECS/feature-template.md
index 7dbc70a5..0340cac8 100644
--- a/SPECS/feature-template.md
+++ b/SPECS/feature-template.md
@@ -1,14 +1,17 @@
 # Feature Spec: <Feature Name>
 
 ## Goal
-- 
+- <One sentence outcome this feature enables>
 
 ## Scope
 - In:
+    - <Included behavior or capability>
 - Out:
+    - <Explicitly excluded behavior or capability>
 
 ## Requirements
-- 
+- <Requirement statement>
+    - <Optional sub-requirement or field-level detail>
 
 ## Acceptance Criteria
-- [ ] 
\ No newline at end of file
+- [ ] <Testable condition>
diff --git a/SPECS/generation-mock.md b/SPECS/generation-mock.md
new file mode 100644
index 00000000..240fe869
--- /dev/null
+++ b/SPECS/generation-mock.md
@@ -0,0 +1,35 @@
+# Feature Spec: Deterministic Mock Generation
+
+## Goal
+- Provide a default answer generator that is deterministic, locally runnable, and independent of external model downloads.
+
+## Scope
+- In:
+    - Implement a mock/extractive generator as the default generation path.
+    - Build answer text from retrieved FAQ snippets.
+    - Define fallback answer behavior for unmatched retrieval.
+- Out:
+    - Human-like conversational quality optimization.
+    - Probabilistic/creative generation behavior.
+
+## Requirements
+- Default generator MUST be selected when no generator is specified or when `generator=mock`.
+- Generator MUST not require network access, API keys, or model downloads.
+- For matched retrieval:
+    - Answer MUST be constructed from retrieved content deterministically.
+    - Same input and same retrieval set MUST produce identical output.
+- For unmatched retrieval:
+    - Return a safe fallback answer.
+    - `sources` MUST be empty in API response.
+- Generator interface MUST be cleanly swappable with optional LLM generator.
+
+## Related Specifications
+- `generation-optional-llm.md` - Alternative LLM-based generator (opt-in)
+- `retrieval-pipeline.md` - Provides source documents for answer generation
+- `ask-response-contract.md` - Defines answer field in response schema
+
+## Acceptance Criteria
+- [x] Tests run fully with `RAG_GENERATOR=mock` and no model downloads. (conftest.py::default_generator_env)
+- [x] Repeated identical requests produce identical answers. (test_determinism.py::test_same_input_produces_same_output)
+- [x] Matched retrieval path returns a non-empty answer derived from source content. (test_contract.py::test_ask_response_contract_contains_required_top_level_keys)
+- [x] Unmatched retrieval path returns fallback answer and empty sources. (test_retrieval.py::test_unknown_query_returns_fallback_with_empty_sources)
diff --git a/SPECS/generation-optional-llm.md b/SPECS/generation-optional-llm.md
new file mode 100644
index 00000000..4d39819b
--- /dev/null
+++ b/SPECS/generation-optional-llm.md
@@ -0,0 +1,40 @@
+# Feature Spec: Optional Flan-T5 Generation
+
+## Goal
+- Enable an optional local LLM generation mode for runtime experimentation without affecting baseline determinism or test portability.
+
+## Scope
+- In:
+    - Support `generator=flan-t5` request option.
+    - Implement a separate generator path backed by Hugging Face `transformers`, with LangChain pipeline integration when available.
+    - Document runtime behavior and first-run model download expectations.
+- Out:
+    - CI dependency on LLM mode.
+    - Any requirement for LLM mode during tests.
+
+## Requirements
+- LLM mode MUST be opt-in via request field:
+    - `generator=flan-t5`
+- Default mode MUST remain `mock`.
+- LLM mode MUST NOT be required to start or test default application workflow.
+- When LangChain Hugging Face integration is available, `flan-t5` mode SHOULD execute through the LangChain adapter path.
+- If LLM mode is selected and model assets are unavailable:
+    - System MUST fail clearly with actionable local setup guidance.
+    - UI MUST instruct user to run `python run.py setup --with-llm`.
+- LLM mode MUST NOT require secrets or API keys.
+- Repository MUST NOT commit model weight files.
+- Documentation MUST clearly state:
+    - LLM mode is optional.
+    - First-run download size/cost is local disk/network only.
+    - Tests run without LLM mode.
+    - How to install LLM assets via `python run.py setup --with-llm`.
+
+## Related Specifications
+- `generation-mock.md` - Default deterministic generator (required for tests)
+- `retrieval-pipeline.md` - Provides source documents for LLM context
+
+## Acceptance Criteria
+- [x] With default environment, app uses mock generator. (test_generator_config.py::test_default_generator_mode_is_mock_deterministic)
+- [x] With `generator=flan-t5`, app routes generation through LLM adapter. (test_generator_config.py::test_flan_t5_mode_is_opt_in_and_fails_clearly_when_unavailable)
+- [x] Test suite does not depend on `flan-t5`. (conftest.py::default_generator_env)
+- [x] Documentation explains optional setup and non-requirement for tests. (README.md sections: Quick Start, Notes)
diff --git a/SPECS/health-endpoint.md b/SPECS/health-endpoint.md
new file mode 100644
index 00000000..1ba2009f
--- /dev/null
+++ b/SPECS/health-endpoint.md
@@ -0,0 +1,26 @@
+# Feature Spec: Health Endpoint
+
+## Goal
+- Provide a stable service-health signal for local development, CI checks, and smoke tests.
+
+## Scope
+- In:
+    - Implement `GET /health`.
+    - Return a deterministic JSON body and HTTP 200.
+- Out:
+    - Metrics, dependency health checks, authentication, and readiness/liveness split.
+
+## Requirements
+- `GET /health` MUST return HTTP 200.
+- Response body MUST be exactly:
+    - `{ "status": "ok" }`
+- Response content type MUST be JSON.
+- Endpoint behavior MUST be deterministic and independent of retrieval/generation subsystems.
+
+## Related Specifications
+- None - Health endpoint is independent of other features
+
+## Acceptance Criteria
+- [x] Calling `GET /health` returns status code `200`. (test_health.py::test_health_returns_ok)
+- [x] Response JSON includes key `status` with value `ok`. (test_health.py::test_health_returns_ok)
+- [x] No API keys, external services, or model downloads are required. (test_health.py::test_health_returns_ok)
diff --git a/SPECS/retrieval-pipeline.md b/SPECS/retrieval-pipeline.md
new file mode 100644
index 00000000..c6357d87
--- /dev/null
+++ b/SPECS/retrieval-pipeline.md
@@ -0,0 +1,56 @@
+# Feature Spec: Retrieval Pipeline
+
+## Goal
+- Retrieve the most relevant local FAQ documents for a customer question using embeddings and local ChromaDB, orchestrated through LangChain.
+
+## Scope
+- In:
+    - Embed incoming question with `all-MiniLM-L6-v2`.
+    - Query local persisted ChromaDB for top-k matches (via LangChain vector store integration).
+    - Expose DB lifecycle endpoints for build/status.
+    - Return scored, sorted source candidates.
+    - Apply minimum relevance threshold rule.
+    - Support deterministic lexical fallback if local embedding assets are unavailable.
+- Out:
+    - Final answer wording strategy.
+    - External document ingestion services.
+
+## Requirements
+- Retrieval MUST use local FAQ corpus data only.
+- Retrieval MUST use local ChromaDB persistence (no remote vector DB).
+- API MUST expose:
+    - `GET /db/status` with DB build status and counts.
+    - `POST /db/build` to build/index FAQ embeddings.
+- Query flow:
+    - Embed question with `all-MiniLM-L6-v2` via LangChain Hugging Face embeddings (SentenceTransformers backend).
+    - Query ChromaDB using `top_k` via LangChain Chroma integration.
+    - Map results to source items with `id`, `title`, `snippet`, `score`.
+- If embedding/vector retrieval cannot initialize due to missing local model assets:
+    - System MUST fall back to deterministic lexical retrieval over local FAQ docs.
+    - Response contract and threshold behavior MUST remain unchanged.
+- Sources MUST be sorted by descending relevance score.
+- If no document satisfies relevance threshold:
+    - Retrieval result MUST be treated as unmatched.
+    - Downstream response MUST use fallback behavior.
+- Relevance threshold MUST be configurable via environment variable:
+    - `RAG_MIN_SCORE`
+    - Default value: `0.25`
+- Retrieval metadata MUST include:
+    - `top_k` as the effective query size.
+    - `matched` as number of documents included in `sources`.
+- Retrieval behavior MUST be deterministic for the same corpus and input.
+- If retrieval DB is not built, `/ask` MUST return `503` with actionable guidance.
+
+## Related Specifications
+- `faq-data.md` - Defines corpus structure that retrieval depends on
+- `ask-endpoint-validation.md` - Validates request before retrieval
+- `generation-mock.md` - Consumes retrieval results to generate answers
+
+## Acceptance Criteria
+- [x] A known banking query retrieves an expected FAQ document as top result. (test_retrieval.py::test_known_query_returns_expected_top_document)
+- [x] Changing `top_k` changes the maximum returned source count accordingly. (test_retrieval.py::test_top_k_controls_maximum_number_of_sources)
+- [x] Source list is sorted by score descending. (test_retrieval.py::test_sources_are_sorted_by_descending_score)
+- [x] Unknown/out-of-domain query triggers unmatched retrieval path. (test_retrieval.py::test_unknown_query_returns_fallback_with_empty_sources)
+- [x] Retrieval metadata reports `top_k` and `matched` accurately. (test_contract.py::test_ask_response_retrieval_metadata_has_required_fields)
+- [X] `GET /db/status` reports whether DB is built and indexed counts. (manual acceptance)
+- [X] `POST /db/build` builds the DB and makes status built=true. (manual acceptance)
diff --git a/SPECS/spec-traceability.md b/SPECS/spec-traceability.md
new file mode 100644
index 00000000..ff7132a0
--- /dev/null
+++ b/SPECS/spec-traceability.md
@@ -0,0 +1,54 @@
+# Feature Spec: Spec Traceability Matrix
+
+## Goal
+- Provide explicit traceability between specifications, tests, and implementation modules.
+
+## Scope
+- In:
+    - Mapping of each spec to planned test files.
+    - Mapping of each spec to planned application modules.
+- Out:
+    - Detailed test-case code.
+    - Release management process.
+
+## Requirements
+- Every feature spec in `SPECS/` MUST map to at least one test file.
+- Every feature spec in `SPECS/` MUST map to at least one implementation module.
+- Traceability document MUST be updated when adding/changing feature specs.
+
+## Acceptance Criteria
+- [x] Matrix includes each current spec file in `SPECS/`.
+- [x] Matrix lists at least one test target per spec.
+- [x] Matrix lists at least one implementation target per spec.
+
+## Mapping
+- `SPECS/health-endpoint.md`
+    - Tests: `tests/test_health.py`
+    - Implementation: `app/main.py`
+- `SPECS/ask-endpoint-validation.md`
+    - Tests: `tests/test_validation.py`
+    - Implementation: `app/models.py`, `app/main.py`
+- `SPECS/retrieval-pipeline.md`
+    - Tests: `tests/test_retrieval.py`, `tests/test_db.py`
+    - Implementation: `app/retrieval.py`
+- `SPECS/generation-mock.md`
+    - Tests: `tests/test_determinism.py`, `tests/test_retrieval.py`, `tests/test_contract.py`
+    - Implementation: `app/generation.py`
+- `SPECS/generation-optional-llm.md`
+    - Tests: `tests/test_generator_config.py` (optional/non-blocking), existing suite in mock mode
+    - Implementation: `app/generation.py`, `app/main.py`
+- `SPECS/ask-response-contract.md`
+    - Tests: `tests/test_contract.py`
+    - Implementation: `app/models.py`, `app/main.py`
+- `SPECS/faq-data.md`
+    - Tests: `tests/test_data_loader.py`, `tests/test_retrieval.py`
+    - Implementation: `app/retrieval.py`, `data/*`
+- `SPECS/streamlit-ui.md`
+    - Tests: `tests/test_streamlit_smoke.py`, `tests/test_streamlit_ui_logic.py`, manual acceptance checks
+    - Implementation: `ui/streamlit_app.py`
+- `SPECS/entrypoint-cli.md`
+    - Tests: `tests/test_cli.py`, manual acceptance checks
+    - Implementation: `run.py`
+- `SPECS/docker-optional.md`
+    - Tests: manual acceptance checks
+    - Implementation: `Dockerfile`, `docker-compose.yml` (optional)
diff --git a/SPECS/streamlit-ui.md b/SPECS/streamlit-ui.md
new file mode 100644
index 00000000..9804cef0
--- /dev/null
+++ b/SPECS/streamlit-ui.md
@@ -0,0 +1,64 @@
+# Feature Spec: Customer FAQ Assistant UI (Streamlit)
+
+## Goal
+- Provide a simple local UI to interact with the Customer FAQ Assistant for manual testing and demonstration.
+
+## Scope
+- In:
+    - Single-page Streamlit application.
+    - Question input and `top_k` control.
+    - Submit workflow that calls `POST /ask`.
+    - Rendering of answer, sources, and retrieval metadata.
+    - User-visible handling for API validation and runtime errors.
+- Out:
+    - Authentication and user accounts.
+    - Multi-page navigation.
+    - Advanced visual design or theming system.
+    - Streaming token-by-token generation output.
+
+## Requirements
+- UI MUST run locally with Streamlit and no API keys.
+- UI MUST be a single-page interface.
+- UI MUST NOT implement authentication, authorization, or user accounts.
+- UI MUST include:
+    - Scrollable chat-style message window.
+    - Chat bubble layout with assistant messages left-aligned and user messages right-aligned.
+    - Initial assistant welcome message on load.
+    - Minimal DB status indicator showing only `built` and `doc_count` from `GET /db/status`.
+    - Build DB action button that calls `POST /db/build`.
+    - Question text input.
+    - `top_k` control constrained to `1..5` with default `3`, displayed inline with generator selection.
+    - Generator selector with `mock` (default) and `flan-t5` options, displayed inline with `top_k`.
+    - Three clickable example query buttons below the message input.
+    - `Clear Chat` button and `Submit` button positioned directly below message input.
+    - `Clear Chat` button visible only after at least one user message exists.
+    - Submit action to call API endpoint `POST /ask`.
+    - On submit, UI MUST immediately append the user message to the chat window and clear the input before backend processing completes.
+- On success (`200`), UI MUST display:
+    - `answer`
+    - `sources` list with `title`, `snippet`, and `score`
+    - `retrieval.top_k` and `retrieval.matched`
+- On fallback responses (`sources=[]`), UI MUST clearly indicate no matching sources were found.
+- On API validation errors (`400`), UI MUST show clear, non-crashing feedback to user.
+- If DB status is not built, question input and submit controls MUST be disabled until build succeeds.
+- If `flan-t5` is selected and model assets are missing, UI MUST instruct the user to run `python run.py setup --with-llm`.
+- UI MUST not require optional LLM mode; default mock mode must be fully supported.
+- UI MUST not embed secrets or credentials in code.
+
+## Related Specifications
+- `ask-endpoint-validation.md` - Defines validation rules that UI must handle
+- `ask-response-contract.md` - Defines response schema that UI must render
+- `generation-mock.md` - Default generator that UI relies on
+
+## Acceptance Criteria
+- [X] User can enter a valid question, submit, and view answer output. (manual acceptance)
+- [X] User can change `top_k` and see reflected retrieval metadata. (manual acceptance)
+- [X] User can switch generator between `mock` and `flan-t5` from the same control row as `top_k`. (manual acceptance)
+- [X] Source citations are rendered when present. (manual acceptance)
+- [X] Fallback path is visible and understandable when no matches exist. (manual acceptance)
+- [X] Validation errors are shown in the UI without app crash. (manual acceptance)
+- [x] UI runs locally against the API in default mock mode. (test_streamlit_smoke.py::test_streamlit_app_module_imports)
+- [x] DB status payload is normalized to `built` and `doc_count` for UI consumption. (tests/test_streamlit_ui_logic.py::test_get_db_status_normalizes_payload)
+- [x] DB status helper surfaces HTTP failures without crashing the UI flow. (tests/test_streamlit_ui_logic.py::test_get_db_status_handles_non_200)
+- [X] UI is implemented as a single page. (manual acceptance)
+- [X] UI is accessible without authentication or account flows. (manual acceptance)
diff --git a/app/__init__.py b/app/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/app/generation.py b/app/generation.py
new file mode 100644
index 00000000..cdc2b3cf
--- /dev/null
+++ b/app/generation.py
@@ -0,0 +1,156 @@
+import importlib.util
+
+
+FALLBACK_ANSWER = (
+    "I could not find a confident match in Mockridge Bank FAQs. "
+    "Please rephrase your question or contact support for help."
+)
+
+
+def _use_langchain_generation() -> bool:
+    return bool(importlib.util.find_spec("langchain_core")) and bool(
+        importlib.util.find_spec("langchain_huggingface")
+    )
+
+
+class MockGenerator:
+    def generate(self, question: str, sources: list[dict]) -> str:
+        if not sources:
+            return FALLBACK_ANSWER
+
+        titles = ", ".join(source["title"] for source in sources[:2])
+        primary = sources[0]["snippet"]
+        return (
+            f"Based on Mockridge Bank FAQ ({titles}): "
+            f"{primary}"
+        )
+
+
+class FlanT5Generator:
+    def __init__(self) -> None:
+        self._llm = None
+        self._generator = None
+
+    def _ensure_model(self) -> None:
+        if self._llm is not None or self._generator is not None:
+            return
+
+        try:
+            from transformers import pipeline
+
+            # Only download via explicit setup command; runtime should be local-only.
+            # Flan-T5 is a seq2seq model designed for instruction following.
+            hf_pipeline = pipeline(
+                "text2text-generation",
+                model="google/flan-t5-small",
+                model_kwargs={"local_files_only": True},
+                max_new_tokens=100,
+                do_sample=False,
+                num_return_sequences=1,
+            )
+            if _use_langchain_generation():
+                from langchain_huggingface import HuggingFacePipeline
+
+                self._llm = HuggingFacePipeline(pipeline=hf_pipeline)
+            else:
+                self._generator = hf_pipeline
+        except Exception as exc:  # pragma: no cover - depends on local model availability
+            raise RuntimeError(
+                "flan-t5-small model is unavailable locally. "
+                "Run `python run.py setup --with-llm` or use generator=mock."
+            ) from exc
+
+    def _generate_with_langchain(self, question: str, sources: list[dict]) -> str:
+        from langchain_core.documents import Document
+
+        from app.rag_chain import generate_answer as generate_rag_answer
+
+        documents = [
+            Document(
+                page_content=str(source.get("snippet", "")),
+                metadata={"title": str(source.get("title", "")), "id": str(source.get("id", ""))},
+            )
+            for source in sources[:3]
+        ]
+        raw_text = str(generate_rag_answer(question=question, documents=documents, llm=self._llm)).strip()
+        
+        # Flan-T5 produces cleaner output than distilgpt2, but still do basic cleanup
+        if raw_text:
+            # Take first substantial sentence/paragraph
+            first_part = raw_text.split('\n\n')[0].strip()
+            
+            # Basic quality check
+            if self._is_low_quality_output(first_part):
+                return ""
+            
+            return first_part
+        return ""
+
+    def _is_low_quality_output(self, text: str) -> bool:
+        """Detect when LLM produces nonsensical output."""
+        if not text or len(text) < 10:
+            return True
+        
+        # Check if output is mostly punctuation or special characters
+        alpha_chars = sum(c.isalpha() for c in text)
+        if alpha_chars < len(text) * 0.5:  # Less than 50% letters
+            return True
+        
+        # Check for repetitive patterns (same word repeated 3+ times)
+        words = text.lower().split()
+        if len(words) > 2:
+            for i in range(len(words) - 2):
+                if words[i] == words[i + 1] == words[i + 2]:
+                    return True
+        
+        return False
+
+    def _generate_legacy(self, question: str, sources: list[dict]) -> str:
+        # Build context from top sources
+        context_parts = [source.get("snippet", "") for source in sources[:2]]
+        context = " ".join(context_parts)
+        
+        # Flan-T5 works better with clear instruction format
+        prompt = f"Answer the question based on the context.\n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:"
+        
+        outputs = self._generator(prompt)
+        raw_text = str(outputs[0].get("generated_text", "")).strip() if outputs else ""
+        
+        if raw_text:
+            # Take first substantial part
+            first_part = raw_text.split('\n\n')[0].strip()
+            
+            # Quality check
+            if self._is_low_quality_output(first_part):
+                return ""
+            
+            return first_part
+        return ""
+
+    def generate(self, question: str, sources: list[dict]) -> str:
+        self._ensure_model()
+
+        if not sources:
+            return FALLBACK_ANSWER
+
+        text = (
+            self._generate_with_langchain(question=question, sources=sources)
+            if self._llm is not None
+            else self._generate_legacy(question=question, sources=sources)
+        )
+        if not text:
+            # LLM produced low-quality output; fall back with explanation
+            titles = ", ".join(source["title"] for source in sources[:2])
+            return (
+                f"Based on Mockridge Bank FAQ ({titles}), see sources below. "
+                f"(Note: LLM generation was unreliable for this query. "
+                f"Try 'mock' generator for consistent results.)"
+            )
+        return text
+
+
+def get_generator(mode: str | None):
+    choice = (mode or "mock").strip().lower()
+    if choice == "flan-t5":
+        return FlanT5Generator()
+    return MockGenerator()
diff --git a/app/main.py b/app/main.py
new file mode 100644
index 00000000..c88b14bd
--- /dev/null
+++ b/app/main.py
@@ -0,0 +1,112 @@
+from fastapi import Body, FastAPI, HTTPException
+from fastapi.responses import JSONResponse
+
+from app.generation import get_generator
+from app.models import AskResponse
+from app.retrieval import build_db, get_db_status, retrieve, to_source_payload
+
+
+app = FastAPI(title="Customer FAQ Assistant API")
+
+
+def _bad_request(detail: str) -> HTTPException:
+    return HTTPException(status_code=400, detail=detail)
+
+
+def _validate_question(payload: dict) -> str:
+    if "question" not in payload:
+        raise _bad_request("question is required")
+
+    question = payload.get("question")
+    if not isinstance(question, str):
+        raise _bad_request("question must be a string")
+
+    stripped = question.strip()
+    if len(stripped) < 5:
+        raise _bad_request("question must be at least 5 characters")
+    if len(stripped) > 300:
+        raise _bad_request("question must be at most 300 characters")
+    return stripped
+
+
+def _validate_top_k(payload: dict) -> int:
+    if "top_k" not in payload:
+        return 3
+
+    top_k = payload.get("top_k")
+    if top_k is None:
+        raise _bad_request("top_k cannot be null")
+    if isinstance(top_k, bool) or not isinstance(top_k, int):
+        raise _bad_request("top_k must be an integer")
+    if top_k < 1 or top_k > 5:
+        raise _bad_request("top_k must be between 1 and 5")
+    return top_k
+
+
+def _validate_generator(payload: dict) -> str:
+    if "generator" not in payload:
+        return "mock"
+
+    value = payload.get("generator")
+    if value is None:
+        raise _bad_request("generator cannot be null")
+    if not isinstance(value, str):
+        raise _bad_request("generator must be a string")
+    choice = value.strip().lower()
+    if choice not in {"mock", "flan-t5"}:
+        raise _bad_request("generator must be mock or flan-t5")
+    return choice
+
+
+@app.exception_handler(HTTPException)
+def http_exception_handler(_, exc: HTTPException):
+    return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
+
+
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+
+
+@app.post("/ask", response_model=AskResponse)
+def ask(payload: dict = Body(...)):
+    if payload is None or not isinstance(payload, dict):
+        raise _bad_request("request body must be a JSON object")
+
+    question = _validate_question(payload)
+    top_k = _validate_top_k(payload)
+    generator_mode = _validate_generator(payload)
+
+    db_status = get_db_status()
+    if not db_status["built"]:
+        raise HTTPException(status_code=503, detail="Database not built. Run POST /db/build first.")
+
+    matched_docs = retrieve(question=question, top_k=top_k)
+    sources = to_source_payload(matched_docs)
+
+    try:
+        generator = get_generator(generator_mode)
+        answer = generator.generate(question=question, sources=sources)
+    except RuntimeError as exc:
+        # LLM generator may fail when model assets are not installed locally.
+        raise HTTPException(status_code=503, detail=str(exc)) from exc
+
+    response = {
+        "answer": answer,
+        "sources": sources,
+        "retrieval": {
+            "top_k": top_k,
+            "matched": len(sources),
+        },
+    }
+    return response
+
+
+@app.get("/db/status")
+def db_status():
+    return get_db_status()
+
+
+@app.post("/db/build")
+def db_build():
+    return build_db()
diff --git a/app/models.py b/app/models.py
new file mode 100644
index 00000000..05d531bf
--- /dev/null
+++ b/app/models.py
@@ -0,0 +1,19 @@
+from pydantic import BaseModel
+
+
+class Source(BaseModel):
+    id: str
+    title: str
+    snippet: str
+    score: float
+
+
+class RetrievalMeta(BaseModel):
+    top_k: int
+    matched: int
+
+
+class AskResponse(BaseModel):
+    answer: str
+    sources: list[Source]
+    retrieval: RetrievalMeta
diff --git a/app/rag_chain.py b/app/rag_chain.py
new file mode 100644
index 00000000..fd959a74
--- /dev/null
+++ b/app/rag_chain.py
@@ -0,0 +1,30 @@
+from langchain_core.documents import Document
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import PromptTemplate
+
+
+RAG_PROMPT = PromptTemplate.from_template(
+    (
+        "Answer the customer's question about Mockridge Bank using only the provided context.\n\n"
+        "Context:\n{context}\n\n"
+        "Question: {question}\n\n"
+        "Answer:"
+    )
+)
+
+
+def _build_context(documents: list[Document]) -> str:
+    chunks: list[str] = []
+    for doc in documents:
+        title = str(doc.metadata.get("title", "")).strip()
+        if title:
+            chunks.append(f"{title}: {doc.page_content}")
+        else:
+            chunks.append(doc.page_content)
+    return "\n\n".join(chunks).strip()
+
+
+def generate_answer(question: str, documents: list[Document], llm) -> str:
+    context = _build_context(documents)
+    chain = RAG_PROMPT | llm | StrOutputParser()
+    return chain.invoke({"question": question, "context": context})
diff --git a/app/retrieval.py b/app/retrieval.py
new file mode 100644
index 00000000..dacbea17
--- /dev/null
+++ b/app/retrieval.py
@@ -0,0 +1,433 @@
+import importlib.util
+import logging
+import os
+import re
+from dataclasses import dataclass
+from functools import lru_cache
+from pathlib import Path
+from typing import Any
+
+import chromadb
+from chromadb.config import Settings
+
+# Chroma 0.6.x can emit noisy telemetry errors with newer posthog versions.
+# Telemetry is already disabled in settings; this also silences those log lines.
+logging.getLogger("chromadb.telemetry.product.posthog").disabled = True
+logging.getLogger("posthog").disabled = True
+
+
+FAQ_DIR = Path("data")
+DEFAULT_MIN_SCORE = 0.25
+MODEL_NAME = "all-MiniLM-L6-v2"
+CHROMA_DIR = Path("chroma")
+COLLECTION_NAME = "mockridge_faq"
+_LEXICAL_FALLBACK_READY = False
+_STOPWORDS = {
+    "a",
+    "an",
+    "and",
+    "are",
+    "can",
+    "do",
+    "for",
+    "how",
+    "i",
+    "in",
+    "is",
+    "it",
+    "my",
+    "of",
+    "on",
+    "or",
+    "the",
+    "to",
+    "what",
+    "with",
+    "you",
+    "your",
+}
+
+
+def _report_build_status(message: str) -> None:
+    print(f"[db-build] {message}", flush=True)
+
+
+@dataclass
+class RetrievedDoc:
+    id: str
+    title: str
+    body: str
+    score: float
+
+
+def _extract_field(text: str, field: str) -> str:
+    match = re.search(rf"(?im)^\s*{re.escape(field)}:\s*(.+)\s*$", text)
+    if not match:
+        return ""
+    return match.group(1).strip()
+
+
+def _extract_body(text: str) -> str:
+    marker = "body: |"
+    idx = text.find(marker)
+    if idx == -1:
+        return ""
+
+    body_lines = text[idx + len(marker) :].splitlines()
+    cleaned = []
+    for line in body_lines:
+        if line.startswith("  "):
+            cleaned.append(line[2:])
+        else:
+            cleaned.append(line.lstrip())
+    return "\n".join(cleaned).strip()
+
+
+def _snippet(body: str, limit: int = 220) -> str:
+    body = body.strip().replace("\n", " ")
+    if len(body) <= limit:
+        return body
+    return body[: limit - 3].rstrip() + "..."
+
+
+def _tokenize(text: str) -> set[str]:
+    raw_tokens = re.findall(r"[a-z0-9]+", text.lower())
+    normalized: set[str] = set()
+    for token in raw_tokens:
+        if len(token) > 3 and token.endswith("s"):
+            normalized.add(token[:-1])
+        normalized.add(token)
+    return normalized
+
+
+def _lexical_search(question: str, docs: list[dict[str, str]], top_k: int, min_score: float) -> list[RetrievedDoc]:
+    query_tokens = _tokenize(question)
+    if not query_tokens:
+        return []
+
+    matches: list[RetrievedDoc] = []
+    key_query_tokens = {token for token in query_tokens if token not in _STOPWORDS}
+    key_query_size = max(1, len(key_query_tokens))
+    for doc in docs:
+        title_tokens = _tokenize(doc["title"])
+        body_tokens = _tokenize(doc["body"])
+        doc_tokens = title_tokens | body_tokens
+        if not doc_tokens or not body_tokens:
+            continue
+
+        overlap = len(query_tokens & doc_tokens)
+        if overlap == 0:
+            continue
+
+        # Prioritize lexical coverage in title to keep obvious intent matches on top
+        # in offline fallback mode (e.g., "checking account" should rank checking docs).
+        title_overlap = len(query_tokens & title_tokens)
+        body_overlap = len(query_tokens & body_tokens)
+        title_coverage = title_overlap / len(query_tokens)
+        body_coverage = body_overlap / len(query_tokens)
+        token_density = overlap / max(1, len(doc_tokens))
+        key_title_overlap = len(key_query_tokens & title_tokens) / key_query_size
+        key_body_overlap = len(key_query_tokens & body_tokens) / key_query_size
+
+        score = min(
+            1.0,
+            (0.42 * body_coverage)
+            + (0.30 * title_coverage)
+            + (0.18 * key_title_overlap)
+            + (0.10 * key_body_overlap)
+            + (0.05 * token_density * 10),
+        )
+        if score < min_score:
+            continue
+
+        matches.append(
+            RetrievedDoc(
+                id=doc["id"],
+                title=doc["title"],
+                body=doc["body"],
+                score=round(score, 6),
+            )
+        )
+
+    matches.sort(key=lambda item: item.score, reverse=True)
+    return matches[:top_k]
+
+
+def load_faq_docs() -> list[dict[str, str]]:
+    docs: list[dict[str, str]] = []
+    if not FAQ_DIR.exists():
+        return docs
+
+    for path in sorted(FAQ_DIR.glob("*.md")):
+        text = path.read_text(encoding="utf-8").strip()
+        doc_id = _extract_field(text, "id")
+        title = _extract_field(text, "title")
+        body = _extract_body(text)
+        if doc_id and title and body:
+            docs.append({"id": doc_id, "title": title, "body": body})
+
+    return docs
+
+
+def get_min_score() -> float:
+    raw = os.getenv("RAG_MIN_SCORE", str(DEFAULT_MIN_SCORE))
+    try:
+        return float(raw)
+    except ValueError:
+        return DEFAULT_MIN_SCORE
+
+
+@lru_cache(maxsize=1)
+def _use_langchain_retrieval() -> bool:
+    return bool(importlib.util.find_spec("langchain_chroma")) and bool(
+        importlib.util.find_spec("langchain_huggingface")
+    )
+
+
+@lru_cache(maxsize=1)
+def _get_client() -> chromadb.PersistentClient:
+    return chromadb.PersistentClient(
+        path=str(CHROMA_DIR),
+        settings=Settings(allow_reset=True, anonymized_telemetry=False),
+    )
+
+
+@lru_cache(maxsize=1)
+def _get_collection() -> Any:
+    client = _get_client()
+    return client.get_or_create_collection(
+        name=COLLECTION_NAME,
+        metadata={"hnsw:space": "cosine"},
+    )
+
+
+@lru_cache(maxsize=1)
+def _get_legacy_model() -> Any:
+    from sentence_transformers import SentenceTransformer
+
+    return SentenceTransformer(MODEL_NAME, local_files_only=True)
+
+
+@lru_cache(maxsize=1)
+def _get_langchain_embeddings() -> Any:
+    from langchain_huggingface import HuggingFaceEmbeddings
+
+    return HuggingFaceEmbeddings(
+        model_name=f"sentence-transformers/{MODEL_NAME}",
+        model_kwargs={"local_files_only": True},
+        encode_kwargs={"normalize_embeddings": True},
+    )
+
+
+@lru_cache(maxsize=1)
+def _get_langchain_vectorstore() -> Any:
+    from langchain_chroma import Chroma
+
+    return Chroma(
+        client=_get_client(),
+        collection_name=COLLECTION_NAME,
+        embedding_function=_get_langchain_embeddings(),
+        collection_metadata={"hnsw:space": "cosine"},
+    )
+
+
+def _ensure_indexed(collection: Any, docs: list[dict[str, str]]) -> None:
+    if not docs:
+        return
+
+    existing_ids: set[str] = set()
+    try:
+        existing = collection.get()
+        existing_ids = set(existing.get("ids", []))
+    except Exception:
+        pass
+
+    new_docs = [doc for doc in docs if doc["id"] not in existing_ids]
+    if not new_docs:
+        return
+
+    if _use_langchain_retrieval():
+        vectorstore = _get_langchain_vectorstore()
+        vectorstore.add_texts(
+            texts=[doc["body"] for doc in new_docs],
+            ids=[doc["id"] for doc in new_docs],
+            metadatas=[{"title": doc["title"], "id": doc["id"]} for doc in new_docs],
+        )
+        return
+
+    model = _get_legacy_model()
+    texts = [f"{doc['title']}\n{doc['body']}" for doc in new_docs]
+    embeddings = model.encode(texts, normalize_embeddings=True).tolist()
+    collection.add(
+        ids=[doc["id"] for doc in new_docs],
+        documents=[doc["body"] for doc in new_docs],
+        metadatas=[{"title": doc["title"], "id": doc["id"]} for doc in new_docs],
+        embeddings=embeddings,
+    )
+
+
+def get_db_status() -> dict:
+    global _LEXICAL_FALLBACK_READY
+
+    docs = load_faq_docs()
+    doc_count = len(docs)
+
+    if _LEXICAL_FALLBACK_READY:
+        indexed_count = doc_count
+        built = doc_count > 0
+    else:
+        try:
+            indexed_count = _get_collection().count()
+        except Exception:
+            _get_collection.cache_clear()
+            collection = _get_collection()
+            indexed_count = collection.count()
+        built = indexed_count >= doc_count and doc_count > 0
+
+    return {
+        "built": built,
+        "doc_count": doc_count,
+        "indexed_count": indexed_count,
+        "collection": COLLECTION_NAME,
+    }
+
+
+def build_db() -> dict:
+    global _LEXICAL_FALLBACK_READY
+
+    _report_build_status("Loading FAQ documents")
+    docs = load_faq_docs()
+    _report_build_status(f"Loaded {len(docs)} documents")
+
+    # Full rebuild: reset the database to remove all collections and stale
+    # index files, then recreate from scratch.  allow_reset=True is set
+    # in the client settings to permit this.
+    _report_build_status("Resetting existing collection")
+    client = _get_client()
+    client.reset()
+    _get_collection.cache_clear()
+    _get_langchain_vectorstore.cache_clear()
+
+    try:
+        _report_build_status("Initializing collection")
+        collection = _get_collection()
+        if docs:
+            if _use_langchain_retrieval():
+                _report_build_status("Indexing with LangChain Chroma embeddings")
+                vectorstore = _get_langchain_vectorstore()
+                vectorstore.add_texts(
+                    texts=[doc["body"] for doc in docs],
+                    ids=[doc["id"] for doc in docs],
+                    metadatas=[{"title": doc["title"], "id": doc["id"]} for doc in docs],
+                )
+            else:
+                _report_build_status("Indexing with SentenceTransformer embeddings")
+                model = _get_legacy_model()
+                texts = [f"{doc['title']}\n{doc['body']}" for doc in docs]
+                embeddings = model.encode(texts, normalize_embeddings=True).tolist()
+                collection.add(
+                    ids=[doc["id"] for doc in docs],
+                    documents=[doc["body"] for doc in docs],
+                    metadatas=[{"title": doc["title"], "id": doc["id"]} for doc in docs],
+                    embeddings=embeddings,
+                )
+
+        after = collection.count()
+        built = after == len(docs) and len(docs) > 0
+        _LEXICAL_FALLBACK_READY = False
+        _report_build_status(f"Completed: indexed={after}, built={built}")
+        return {
+            "built": built,
+            "doc_count": len(docs),
+            "indexed_count": after,
+            "added": after,
+            "collection": COLLECTION_NAME,
+        }
+    except Exception:
+        # Offline-safe fallback for environments without local embedding assets.
+        _LEXICAL_FALLBACK_READY = len(docs) > 0
+        _report_build_status(
+            "Embedding index unavailable; using deterministic lexical fallback mode"
+        )
+        return {
+            "built": _LEXICAL_FALLBACK_READY,
+            "doc_count": len(docs),
+            "indexed_count": len(docs),
+            "added": len(docs),
+            "collection": COLLECTION_NAME,
+        }
+
+
+def retrieve(question: str, top_k: int) -> list[RetrievedDoc]:
+    global _LEXICAL_FALLBACK_READY
+
+    docs = load_faq_docs()
+    if not docs:
+        return []
+
+    min_score = get_min_score()
+    collection = _get_collection()
+    try:
+        _ensure_indexed(collection, docs)
+
+        matches: list[RetrievedDoc] = []
+        if _use_langchain_retrieval():
+            vectorstore = _get_langchain_vectorstore()
+            results = vectorstore.similarity_search_with_score(question, k=top_k)
+            for document, distance in results:
+                meta = document.metadata or {}
+                score = 1.0 - float(distance)
+                if score < min_score:
+                    continue
+                matches.append(
+                    RetrievedDoc(
+                        id=str(meta.get("id", "")),
+                        title=str(meta.get("title", "")),
+                        body=str(document.page_content),
+                        score=round(score, 6),
+                    )
+                )
+        else:
+            model = _get_legacy_model()
+            query_embedding = model.encode([question], normalize_embeddings=True).tolist()[0]
+            result = collection.query(
+                query_embeddings=[query_embedding],
+                n_results=top_k,
+                include=["documents", "metadatas", "distances"],
+            )
+            documents = result.get("documents", [[]])[0]
+            metadatas = result.get("metadatas", [[]])[0]
+            distances = result.get("distances", [[]])[0]
+            for doc_body, meta, distance in zip(documents, metadatas, distances):
+                score = 1.0 - float(distance)
+                if score < min_score:
+                    continue
+                matches.append(
+                    RetrievedDoc(
+                        id=str(meta.get("id", "")),
+                        title=str(meta.get("title", "")),
+                        body=str(doc_body),
+                        score=round(score, 6),
+                    )
+                )
+
+        matches.sort(key=lambda item: item.score, reverse=True)
+        _LEXICAL_FALLBACK_READY = False  # clear fallback once embedding path succeeds
+        return matches[:top_k]
+    except Exception:
+        _LEXICAL_FALLBACK_READY = True
+        return _lexical_search(question=question, docs=docs, top_k=top_k, min_score=min_score)
+
+
+def to_source_payload(docs: list[RetrievedDoc]) -> list[dict]:
+    payload = []
+    for doc in docs:
+        payload.append(
+            {
+                "id": doc.id,
+                "title": doc.title,
+                "snippet": _snippet(doc.body),
+                "score": doc.score,
+            }
+        )
+    return payload
diff --git a/constitution.md b/constitution.md
new file mode 100644
index 00000000..59051637
--- /dev/null
+++ b/constitution.md
@@ -0,0 +1,110 @@
+# Project Constitution — Customer FAQ Assistant (Mockridge Bank)
+
+## 1) Purpose and Scope
+This constitution defines the engineering intent for this repository.
+It is the decision baseline for humans and AI contributors.
+
+Source of truth order:
+1. `SPECS/` (feature behavior and acceptance criteria)
+2. `tests/` (executable verification)
+3. `constitution.md` (engineering principles and quality gates)
+4. implementation code
+
+If implementation changes behavior, specs/tests must be updated in the same change set.
+
+## 2) Product and Runtime Constraints
+- Local-first system. No paid APIs or secrets required for default workflows.
+- Python support range for project setup/runtime: `>=3.10`, `<3.13` (3.10/3.11/3.12).
+- Default test workflow must run without network dependency on external hosted services.
+- Optional LLM mode is allowed but must remain opt-in and non-blocking for tests.
+
+## 3) Core Architectural Intent
+
+### 3.1 Simplicity Over Cleverness
+- Prefer direct, readable code paths over abstraction layers.
+- Introduce abstraction only when at least two concrete use cases require it.
+- Minimize hidden behavior and magic configuration.
+
+### 3.2 Modularity and Boundaries
+- API layer (`app/main.py`) handles HTTP contracts and validation flow.
+- Retrieval layer (`app/retrieval.py`) owns indexing/query logic and score filtering.
+- Generation layer (`app/generation.py`, `app/rag_chain.py`) owns answer production.
+- UI layer (`ui/streamlit_app.py`) owns presentation and user interaction only.
+- Do not mix UI concerns into API/retrieval/generation modules.
+
+### 3.3 Determinism by Default
+- Default generation mode must remain deterministic (`mock`).
+- Retrieval must be deterministic for identical corpus and inputs.
+- Optional stochastic behavior must be explicitly opt-in.
+
+### 3.4 Observability and Operability
+- Every major workflow must be runnable from `run.py`.
+- Required commands include setup, test, and matrix testing:
+  - `python run.py setup`
+  - `python run.py test`
+  - `python run.py test-matrix`
+- Runtime errors must be actionable and user-readable.
+
+## 4) Dependency and Integration Policy
+- Prefer stable, mainstream libraries with clear maintenance.
+- LangChain integration is allowed for orchestration; local Chroma remains required for persistence.
+- New dependencies must have a clear justification in PR description.
+- Avoid adding dependencies for trivial utility behavior.
+- Tooling dependencies (e.g., `tox`, `nox`) are development concerns and should not be required for production runtime.
+
+## 5) Testing and Quality Standards
+
+### 5.1 Test Expectations
+- Any behavior change must include or update tests.
+- API contract fields and error semantics are backward-compatible unless a spec explicitly changes.
+- New features should include at least one integration-path verification.
+
+### 5.2 Quality Gates
+- Must pass before merge:
+  - `python -m pytest -q` (or `python run.py test`)
+  - `python run.py test-matrix` for available local interpreters
+  - GitHub Actions CI workflow (Python 3.10/3.11/3.12) on the target branch/PR
+- For behavior-affecting changes:
+  - relevant spec files in `SPECS/` updated
+  - traceability preserved (spec -> tests -> implementation)
+
+### 5.3 Error Handling
+- Fail clearly with actionable messages.
+- Avoid silent fallthroughs that hide failures.
+- Keep fallback behavior explicit and deterministic when used.
+
+## 6) Naming and Code Conventions
+- Use clear, domain-oriented names (`retrieve`, `build_db`, `get_db_status`).
+- Avoid ambiguous abbreviations in public interfaces.
+- Keep module responsibilities single-purpose and explicit.
+
+## 7) Change Management (Living Document)
+- This file is version-controlled and must evolve with architectural decisions.
+- Update this constitution when:
+  - supported Python range changes
+  - core architectural boundaries shift
+  - quality gates or required tooling change
+- Constitution updates should explain intent, not low-level implementation detail.
+
+## 8) Collaboration Model
+- Architecture decisions should be reviewable in writing (PR description/spec updates).
+- Major direction changes should be agreed by project stakeholders (engineering + product owner).
+- Do not rely on private chat context as the only rationale for repository-wide decisions.
+
+## 9) Validation Checklists
+
+### 9.1 Before Implementation
+- [ ] Relevant spec exists or is updated
+- [ ] Module boundary for change is clear
+- [ ] Dependency impact is understood
+
+### 9.2 Before Merge
+- [ ] Tests pass locally (`pytest`)
+- [ ] Matrix check run (`test-matrix`) for available interpreters
+- [ ] Specs updated for behavior changes
+- [ ] Error messages are actionable
+- [ ] No secrets or external paid API requirements introduced
+
+### 9.3 After Merge (if applicable)
+- [ ] Follow-up docs updated (`constitution.md`, `SPECS/`, `README.md`)
+- [ ] Any deferred risks tracked explicitly
diff --git a/data/auto_loans.md b/data/auto_loans.md
new file mode 100644
index 00000000..b526d825
--- /dev/null
+++ b/data/auto_loans.md
@@ -0,0 +1,16 @@
+id: auto_loans
+title: Mockridge Bank Auto Loans
+body: |
+  Mockridge Bank offers auto loans for new and used vehicles, including purchases from dealers or private sellers. Loan terms typically range from 36 to 72 months. Rates and terms depend on creditworthiness, vehicle age, loan amount, and term length.
+
+  Pre-approval: You can apply for pre-approval online to understand your budget before shopping. Pre-approval does not require you to select a vehicle and can help when negotiating with a dealer. Pre-approval decisions are based on the information you provide and a credit review.
+
+  Payment options: Automatic payments are available and may reduce your APR. You can select a due date that aligns with your budget, and set up email or mobile alerts for upcoming payments. There are no prepayment penalties, so you can pay extra or pay off the loan early without fees.
+
+  Required documentation: Typical documentation includes proof of identity, proof of income, and vehicle details once selected. For private-party purchases, additional documentation may be required. Mockridge Bank will provide a checklist during the application process.
+
+  Refinancing: If you already have an auto loan, you may be eligible to refinance with Mockridge Bank. Refinancing can help reduce your monthly payment, lower your rate, or adjust your term length based on your goals.
+
+  Insurance and collateral: Auto loans are secured by the vehicle, and comprehensive and collision insurance are typically required for the life of the loan. Mockridge Bank will confirm insurance coverage as part of the funding process.
+
+  To get current rates and determine eligibility, apply online or speak with a Mockridge Bank loan officer. Terms and rates are subject to change based on market conditions and applicant qualifications.
diff --git a/data/certificates_of_deposit.md b/data/certificates_of_deposit.md
new file mode 100644
index 00000000..63a5308e
--- /dev/null
+++ b/data/certificates_of_deposit.md
@@ -0,0 +1,14 @@
+id: certificates_of_deposit
+title: Mockridge Bank Certificates of Deposit (CDs)
+body: |
+  Mockridge Bank offers Certificates of Deposit (CDs) for customers who want a fixed rate and predictable return. CDs are available in a range of terms, typically from 6 months to 60 months. Longer terms generally offer higher rates. Once you open a CD, the rate is locked in for the full term.
+
+  Minimum deposit: CDs require a minimum opening deposit, which may vary by term and promotional offers. A higher opening balance does not change the rate, but it increases the total interest earned over the term.
+
+  Interest and payout: Interest is compounded and may be credited monthly, quarterly, or at maturity depending on the CD option you select. You can choose to have interest paid into a Mockridge Bank checking or savings account, or reinvested into the CD.
+
+  Early withdrawal: CDs are intended to be held to maturity. Early withdrawals are subject to a penalty based on the term of the CD and the amount withdrawn. In some cases, the penalty may exceed the interest earned.
+
+  Renewal: At maturity, you can withdraw funds, renew the CD for another term, or roll the balance into a new CD. Mockridge Bank will send a maturity notice with options and deadlines.
+
+  CDs can be a good fit if you want stable returns and do not expect to need the funds during the term. For more flexibility, consider a savings or high-yield savings account.
diff --git a/data/checking_accounts.md b/data/checking_accounts.md
new file mode 100644
index 00000000..e48e6c5f
--- /dev/null
+++ b/data/checking_accounts.md
@@ -0,0 +1,14 @@
+id: checking_accounts
+title: Mockridge Bank Checking Accounts
+body: |
+  Mockridge Bank offers two checking account options designed for everyday spending and simple money management: Everyday Checking and Premium Checking. Everyday Checking is intended for routine use such as bill pay, debit card purchases, and direct deposits. It includes a basic set of features without requiring a high balance. Premium Checking includes additional benefits for customers who keep larger balances or use more banking services.
+
+  Fees and waivers: Everyday Checking has no monthly maintenance fee when you enroll in eStatements; otherwise a $5 monthly fee may apply. Premium Checking has a $15 monthly fee that can be waived with qualifying direct deposits or by maintaining a combined balance threshold across linked Mockridge Bank accounts. Fee waivers are evaluated each statement cycle based on your account activity.
+
+  Included features: All checking accounts include a debit card, online banking, mobile check deposit, and access to the Mockridge Bank mobile app. You can set up automatic bill pay, recurring transfers, and low balance alerts. Both accounts support ATM withdrawals, and Premium Checking includes a higher allowance for out-of-network ATM fee reimbursements each month.
+
+  Deposits and access: You can open a checking account online or at any Mockridge Bank branch with a valid ID and an initial deposit. Direct deposit is available for payroll and government benefits. Funds availability follows our standard deposit policy, which varies by deposit type and account history.
+
+  Additional services: Overdraft coverage is optional and can be added or removed at any time. You can link a Mockridge Bank savings account to help cover overdrafts automatically. Wire transfers and cashier's checks are available for an additional fee. For customers who travel, Premium Checking includes waived incoming wire fees and enhanced fraud monitoring.
+
+  If you are unsure which checking account is right for you, Mockridge Bank staff can review your typical monthly activity and recommend the best fit based on fees, balance patterns, and desired features.
diff --git a/data/credit_cards.md b/data/credit_cards.md
new file mode 100644
index 00000000..391ea328
--- /dev/null
+++ b/data/credit_cards.md
@@ -0,0 +1,16 @@
+id: credit_cards
+title: Mockridge Bank Credit Cards
+body: |
+  Mockridge Bank offers two primary credit card options: the Cash Rewards Card and the Travel Rewards Card. Both cards include fraud monitoring, mobile alerts, and online account management through the Mockridge Bank mobile app and online banking.
+
+  Cash Rewards Card: Earn 1.5% cash back on all purchases with no rotating categories. Cash back can be redeemed as a statement credit or deposited into a Mockridge Bank checking or savings account. There is no annual fee for the Cash Rewards Card.
+
+  Travel Rewards Card: Earn points on travel and dining purchases and standard points on all other categories. Points can be redeemed for travel statement credits, gift cards, or merchandise. The Travel Rewards Card has no foreign transaction fees and includes travel assistance features such as emergency card replacement.
+
+  Rates and limits: APRs and credit limits are based on creditworthiness. Introductory promotions may be available periodically, and details are provided at application time. You can view your current APR and minimum payment requirements in your account dashboard.
+
+  Payment options: You can set up automatic payments for the minimum due, a fixed amount, or the full statement balance. Alerts can be configured for payment due dates and large transactions.
+
+  Security: If you suspect fraud, you can lock your card instantly in the mobile app and contact Mockridge Bank support. Replacement cards can be ordered through the app or by phone.
+
+  To apply, submit a secure online application or visit a Mockridge Bank branch. Approval decisions are based on credit history, income, and other eligibility factors.
diff --git a/data/fraud_disputes.md b/data/fraud_disputes.md
new file mode 100644
index 00000000..30b775cb
--- /dev/null
+++ b/data/fraud_disputes.md
@@ -0,0 +1,14 @@
+id: fraud_disputes
+title: Mockridge Bank Fraud and Disputes
+body: |
+  If you believe a transaction is unauthorized, contact Mockridge Bank immediately through the mobile app or by calling support. You can lock your card in the app while the dispute is reviewed. Most disputes are investigated within 10 business days, and you may be asked to provide documentation or a written statement.
+
+  Reporting steps: Start by reviewing the transaction details in your account history. If the transaction looks unfamiliar, use the in-app dispute feature or contact support by phone. Be prepared to confirm recent account activity and verify your identity.
+
+  Investigation process: After a dispute is opened, Mockridge Bank reviews transaction data, merchant information, and any details you provide. Some cases are resolved quickly, while others may require additional research. You can monitor dispute status in online banking or the mobile app.
+
+  Provisional credit: Depending on the circumstances, provisional credit may be issued while the investigation is ongoing. If the dispute is resolved in your favor, the credit becomes permanent. If the dispute is denied, the provisional credit may be reversed.
+
+  Card controls: You can lock or unlock your card at any time. If you suspect your card was compromised, request a replacement immediately. Card replacement and expedited shipping options are available.
+
+  Tips for prevention: Enable transaction alerts, keep your contact details current, and review your account regularly. Mockridge Bank will never ask for your full PIN or password by email or text.
diff --git a/data/mobile_app.md b/data/mobile_app.md
new file mode 100644
index 00000000..f10f31bd
--- /dev/null
+++ b/data/mobile_app.md
@@ -0,0 +1,14 @@
+id: mobile_app
+title: Mockridge Bank Mobile App
+body: |
+  The Mockridge Bank mobile app provides secure access to your accounts and helps you manage daily banking tasks. You can view balances, review transaction history, transfer funds, deposit checks, and manage your debit or credit cards.
+
+  Mobile check deposit: Eligible accounts can deposit checks by taking a photo. Daily and monthly deposit limits apply and vary by account history. Funds availability depends on the type of deposit and your account standing.
+
+  Alerts and controls: Set up alerts for low balances, large transactions, and payment due dates. You can lock or unlock your card instantly, report a lost card, or request a replacement. Notifications can be delivered by push, email, or SMS.
+
+  Payments and transfers: The app supports internal transfers between Mockridge Bank accounts and external transfers to linked banks. You can schedule recurring transfers and bill payments from within the app.
+
+  Security: The app supports Face ID and Touch ID on compatible devices. Sessions time out automatically after inactivity. For account protection, keep your app up to date and avoid sharing your credentials.
+
+  Availability: The Mockridge Bank mobile app is available for iOS and Android. Download links are provided on the Mockridge Bank website and in online banking.
diff --git a/data/overdraft_fees.md b/data/overdraft_fees.md
new file mode 100644
index 00000000..a926032d
--- /dev/null
+++ b/data/overdraft_fees.md
@@ -0,0 +1,14 @@
+id: overdraft_fees
+title: Mockridge Bank Overdraft Fees
+body: |
+  Mockridge Bank charges a $35 overdraft fee when a transaction is approved without sufficient funds. We do not charge an overdraft fee for transactions that are declined. Overdraft fees are limited to a maximum of three per business day.
+
+  Coverage options: You can opt in or out of overdraft coverage for debit card and ATM transactions. This setting can be managed in online banking, the mobile app, or by contacting support. Checks and recurring payments may still be covered depending on your account settings.
+
+  Overdraft protection: Linking a Mockridge Bank savings account for automatic transfers can help prevent overdrafts. When enabled, funds are transferred from your linked savings account to cover transactions. Transfer amounts and limits are described in your account disclosures.
+
+  Grace and alerts: Mockridge Bank provides balance alerts to help you avoid overdrafts. You can configure alerts for low balances, large transactions, or daily balance updates. If an overdraft occurs, you will see a notification in the mobile app and online banking.
+
+  How to avoid overdrafts: Keep a buffer in your checking account, enable alerts, and consider linking savings. You can also review pending transactions in the app to understand what is scheduled to post.
+
+  If you have questions about overdraft fees or want to adjust coverage, contact Mockridge Bank support or visit a branch for assistance.
diff --git a/data/savings_accounts.md b/data/savings_accounts.md
new file mode 100644
index 00000000..7f02db5d
--- /dev/null
+++ b/data/savings_accounts.md
@@ -0,0 +1,16 @@
+id: savings_accounts
+title: Mockridge Bank Savings Accounts
+body: |
+  Mockridge Bank savings accounts are designed for short-term and long-term goals, such as building an emergency fund, saving for a purchase, or setting aside funds for travel. We offer a Standard Savings account and a High-Yield Savings account. Both accounts are insured and are accessible through online banking and the Mockridge Bank mobile app.
+
+  Standard Savings features: Standard Savings offers a low minimum balance and straightforward access. It is a good option for customers who want basic savings without complex tiered rates. The account includes automatic transfers from checking and goal-based savings tools in the app.
+
+  High-Yield Savings features: High-Yield Savings provides tiered interest rates based on balance. Higher balances qualify for higher rates. Interest is compounded daily and paid monthly. The account is suitable for customers who keep larger balances and want a better return.
+
+  Fees and minimums: There is no monthly maintenance fee if the minimum daily balance is $200 or more; otherwise a $4 fee may apply. There is no fee to transfer funds between your Mockridge Bank checking and savings accounts when initiated through online or mobile banking.
+
+  Access and transfers: You can link your savings account to your Mockridge Bank checking account for easy transfers. Transfers can be scheduled or automatic. If you need to move money between accounts frequently, a linked checking account offers the fastest access.
+
+  Opening an account: You can open a savings account online or at a branch with a valid ID and an initial deposit. If you already have a checking account, you can open and link savings in minutes within online banking.
+
+  If you are deciding between Standard and High-Yield Savings, consider your typical balance and savings timeline. Mockridge Bank staff can help you compare rates and account features so you can choose the best fit.
diff --git a/data/support_hours.md b/data/support_hours.md
new file mode 100644
index 00000000..100ff156
--- /dev/null
+++ b/data/support_hours.md
@@ -0,0 +1,12 @@
+id: support_hours
+title: Mockridge Bank Support Hours
+body: |
+  Mockridge Bank customer support is available Monday through Friday from 8:00 AM to 8:00 PM, and Saturday from 9:00 AM to 2:00 PM local time. Sunday support is closed. You can reach support by phone, secure message in online banking, or chat within the mobile app during business hours.
+
+  Contact channels: Phone support is best for urgent issues such as suspected fraud, locked accounts, or card replacement. Secure messaging is ideal for routine account questions and documentation requests. In-app chat is available for general assistance and navigation help.
+
+  Branch support: Branch locations follow local hours and may vary by location. Holiday hours are published in advance on the Mockridge Bank website and on branch signage.
+
+  Accessibility: Mockridge Bank offers additional assistance for customers who need accommodations. Please mention any accessibility needs when contacting support so we can route you to the appropriate team.
+
+  Emergency guidance: If you suspect fraud outside normal hours, use the mobile app to lock your card immediately and submit a dispute. You can also leave a secure message for follow-up.
diff --git a/data/wire_transfers.md b/data/wire_transfers.md
new file mode 100644
index 00000000..a8698cc8
--- /dev/null
+++ b/data/wire_transfers.md
@@ -0,0 +1,14 @@
+id: wire_transfers
+title: Mockridge Bank Wire Transfers
+body: |
+  Mockridge Bank provides domestic and international wire transfer services for customers who need to move funds quickly. Wire transfers are typically same-day for domestic requests submitted before the cutoff time, and international wires may take 1 to 3 business days depending on destination.
+
+  How to request: You can submit a wire request through a branch, by calling customer support, or via secure message in online banking (eligibility may vary by account type). You will need the recipient's name, bank name, routing number or SWIFT code, account number, and transfer amount.
+
+  Fees and limits: Wire transfer fees depend on whether the wire is domestic or international and whether it is incoming or outgoing. Limits may apply based on account tenure and verification requirements. Fees and limits are disclosed at the time of request.
+
+  Security and verification: For your protection, Mockridge Bank may require additional verification before processing a wire. This can include callback verification, document review, or in-branch confirmation.
+
+  Cutoff times: Domestic wires typically require submission before the daily cutoff time to process same-day. International wires have earlier cutoff times due to intermediary banking timelines. Exact times are available through customer support.
+
+  If you need frequent transfers, a Mockridge Bank representative can help you set up templates and verify beneficiary details to reduce errors and processing delays.
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..ec53200e
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,23 @@
+services:
+  api:
+    build: .
+    ports:
+      - "8000:8000"
+    environment:
+      - RAG_GENERATOR=mock
+      - RAG_MIN_SCORE=0.25
+    volumes:
+      - ./data:/app/data
+      - ./chroma:/app/chroma
+  ui:
+    build: .
+    working_dir: /app
+    volumes:
+      - ./ui:/app/ui
+    command: ["python", "-m", "streamlit", "run", "ui/streamlit_app.py", "--server.port", "8501", "--server.address", "0.0.0.0"]
+    ports:
+      - "8501:8501"
+    environment:
+      - API_URL=http://api:8000
+    depends_on:
+      - api
diff --git a/images/streamlit_ui.png b/images/streamlit_ui.png
new file mode 100644
index 00000000..c961906d
Binary files /dev/null and b/images/streamlit_ui.png differ
diff --git a/noxfile.py b/noxfile.py
new file mode 100644
index 00000000..2b6d94ef
--- /dev/null
+++ b/noxfile.py
@@ -0,0 +1,7 @@
+import nox
+
+
+@nox.session(python=["3.11", "3.12"])
+def tests(session: nox.Session) -> None:
+    session.install("-r", "requirements.txt")
+    session.run("python", "-m", "pytest", "-q")
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..8430b516
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,17 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+addopts = -ra
+filterwarnings =
+    ignore:Accessing the 'model_fields' attribute on the instance is deprecated.*:pydantic.warnings.PydanticDeprecatedSince211:chromadb\.types
+    ignore:builtin type SwigPyPacked has no __module__ attribute:DeprecationWarning
+    ignore:builtin type SwigPyObject has no __module__ attribute:DeprecationWarning
+    ignore:builtin type swigvarlink has no __module__ attribute:DeprecationWarning
+markers =
+    smoke: quick smoke checks for module startup/import paths
+    unit: unit tests for isolated component behavior
+    integration: integration tests requiring multiple components
+    requires_data: tests that depend on FAQ corpus data
+    optional: optional tests that don't block CI (e.g., LLM mode)
+    contract: API contract validation tests
+    validation: input validation and error handling tests
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..8c7117c2
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,23 @@
+# Core API
+fastapi>=0.115,<1.0
+uvicorn[standard]>=0.30,<1.0
+pydantic>=2.8,<3.0
+
+# RAG components
+chromadb>=0.5,<1.0
+sentence-transformers>=3.0,<4.0
+transformers>=4.44,<5.0
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch>=2.3,<3.0
+sentencepiece>=0.2,<0.3
+langchain>=0.3,<0.4
+langchain-core>=0.3,<0.4
+langchain-huggingface>=0.1,<0.2
+langchain-chroma>=0.1,<0.2
+
+# UI
+streamlit>=1.37,<2.0
+requests>=2.32,<3.0
+
+# Testing
+pytest>=8.2,<9.0
diff --git a/run.py b/run.py
new file mode 100644
index 00000000..a1584dcb
--- /dev/null
+++ b/run.py
@@ -0,0 +1,499 @@
+#!/usr/bin/env python3
+import os
+import shutil
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+
+PROJECT_ROOT = Path(__file__).resolve().parent
+VENV_DIR = PROJECT_ROOT / ".venv"
+SUPPORTED_MIN = (3, 10)
+SUPPORTED_MAX_EXCLUSIVE = (3, 13)
+
+
+def _venv_python() -> str:
+    if sys.platform.startswith("win"):
+        return str(VENV_DIR / "Scripts" / "python.exe")
+    return str(VENV_DIR / "bin" / "python")
+
+
+def _run(cmd: list[str], cwd: Path | None = None, check: bool = False, env: dict | None = None) -> int:
+    proc = subprocess.run(cmd, cwd=cwd or PROJECT_ROOT, env=env)
+    if check and proc.returncode != 0:
+        raise SystemExit(proc.returncode)
+    return proc.returncode
+
+
+def _is_supported_version(version: tuple[int, int]) -> bool:
+    return SUPPORTED_MIN <= version < SUPPORTED_MAX_EXCLUSIVE
+
+
+def _version_text(version: tuple[int, int]) -> str:
+    return f"{version[0]}.{version[1]}"
+
+
+def _probe_python_version(cmd_prefix: list[str]) -> tuple[int, int] | None:
+    try:
+        proc = subprocess.run(
+            cmd_prefix + ["-c", "import sys; print(f'{sys.version_info[0]}.{sys.version_info[1]}')"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+    except (OSError, subprocess.SubprocessError):
+        return None
+    if proc.returncode != 0:
+        return None
+    raw = proc.stdout.strip()
+    parts = raw.split(".")
+    if len(parts) != 2:
+        return None
+    try:
+        return int(parts[0]), int(parts[1])
+    except ValueError:
+        return None
+
+
+def _candidate_python_commands() -> list[list[str]]:
+    candidates: list[list[str]] = []
+    if sys.platform.startswith("win"):
+        if shutil.which("py"):
+            candidates.extend([["py", "-3.12"], ["py", "-3.11"], ["py", "-3.10"]])
+        if shutil.which("python"):
+            candidates.append(["python"])
+        return candidates
+
+    for command in ["python3.12", "python3.11", "python3.10", "python3", "python"]:
+        if shutil.which(command):
+            candidates.append([command])
+    return candidates
+
+
+def _select_supported_python_command(override: str | None = None) -> list[str] | None:
+    if override:
+        override_cmd = [override]
+        version = _probe_python_version(override_cmd)
+        if version is None:
+            print(f"Could not execute Python override: {override}")
+            return None
+        if not _is_supported_version(version):
+            print(
+                "Unsupported Python override version: "
+                f"{_version_text(version)}. Supported range is >=3.10 and <3.13."
+            )
+            return None
+        return override_cmd
+
+    for cmd in _candidate_python_commands():
+        version = _probe_python_version(cmd)
+        if version and _is_supported_version(version):
+            print(f"Using Python {_version_text(version)} via: {' '.join(cmd)}")
+            return cmd
+    return None
+
+
+def _attempt_python_install() -> int:
+    print("Attempting to install Python 3.12 with an available package manager...")
+    installers: list[list[str]] = []
+
+    if sys.platform.startswith("win"):
+        if shutil.which("winget"):
+            installers.append(["winget", "install", "-e", "--id", "Python.Python.3.12"])
+        if shutil.which("choco"):
+            installers.append(["choco", "install", "python312", "-y"])
+    elif sys.platform == "darwin":
+        if shutil.which("brew"):
+            installers.append(["brew", "install", "python@3.12"])
+    else:
+        if shutil.which("apt-get"):
+            installers.append(["apt-get", "update"])
+            installers.append(["apt-get", "install", "-y", "python3.12", "python3.12-venv"])
+        elif shutil.which("dnf"):
+            installers.append(["dnf", "install", "-y", "python3.12"])
+        elif shutil.which("yum"):
+            installers.append(["yum", "install", "-y", "python3.12"])
+
+    # Fallback attempts for package managers that expose only 3.11 packages.
+    if not installers:
+        if sys.platform.startswith("win") and shutil.which("choco"):
+            installers.append(["choco", "install", "python311", "-y"])
+        elif sys.platform == "darwin" and shutil.which("brew"):
+            installers.append(["brew", "install", "python@3.11"])
+        elif shutil.which("apt-get"):
+            installers.append(["apt-get", "update"])
+            installers.append(["apt-get", "install", "-y", "python3.11", "python3.11-venv"])
+        elif shutil.which("dnf"):
+            installers.append(["dnf", "install", "-y", "python3.11"])
+        elif shutil.which("yum"):
+            installers.append(["yum", "install", "-y", "python3.11"])
+
+    if not installers:
+        print("No supported package manager detected for automatic Python install.")
+        return 2
+
+    for command in installers:
+        print(f"Running: {' '.join(command)}")
+        exit_code = _run(command)
+        if exit_code != 0:
+            print("Install command failed.")
+            return exit_code
+    return 0
+
+
+def _ensure_venv(python_cmd: list[str]) -> str:
+    if not VENV_DIR.exists():
+        print("Creating virtual environment at .venv")
+        _run(python_cmd + ["-m", "venv", str(VENV_DIR)], check=True)
+    return _venv_python()
+
+
+def _has_docker() -> bool:
+    return shutil.which("docker") is not None
+
+
+def _download_embedding_model(python_bin: str) -> int:
+    print("Downloading embedding model (all-MiniLM-L6-v2)...")
+    
+    # Suppress harmless HuggingFace warnings (symlink cache on Windows, hf_xet).
+    hf_env = {**os.environ, "HF_HUB_DISABLE_SYMLINKS_WARNING": "1"}
+    
+    # First, download with network access
+    result = _run(
+        [
+            python_bin,
+            "-c",
+            (
+                "import warnings; warnings.filterwarnings('ignore'); "
+                "from langchain_huggingface import HuggingFaceEmbeddings; "
+                "print('Downloading model...'); "
+                "embeddings = HuggingFaceEmbeddings("
+                "model_name='sentence-transformers/all-MiniLM-L6-v2', "
+                "model_kwargs={'local_files_only': False}"
+                "); "
+                "embeddings.embed_query('test'); "
+                "print('Download complete')"
+            ),
+        ],
+        env=hf_env,
+    )
+    
+    if result != 0:
+        return result
+    
+    # Verify it can be loaded with local_files_only=True (same as retrieval code)
+    print("Verifying model is cached for offline use...")
+    return _run(
+        [
+            python_bin,
+            "-c",
+            (
+                "import warnings; warnings.filterwarnings('ignore'); "
+                "from langchain_huggingface import HuggingFaceEmbeddings; "
+                "embeddings = HuggingFaceEmbeddings("
+                "model_name='sentence-transformers/all-MiniLM-L6-v2', "
+                "model_kwargs={'local_files_only': True}, "
+                "encode_kwargs={'normalize_embeddings': True}"
+                "); "
+                "embeddings.embed_query('verification test'); "
+                "print('Embedding model verified and ready for offline use')"
+            ),
+        ],
+        env=hf_env,
+    )
+
+
+def _download_llm_assets(python_bin: str) -> int:
+    print("Downloading flan-t5-small model assets...")
+    hf_env = {**os.environ, "HF_HUB_DISABLE_SYMLINKS_WARNING": "1"}
+    return _run(
+        [
+            python_bin,
+            "-c",
+            (
+                "import warnings; warnings.filterwarnings('ignore'); "
+                "from transformers import pipeline; "
+                "pipeline('text2text-generation', model='google/flan-t5-small')"
+            ),
+        ],
+        env=hf_env,
+    )
+
+
+def _build_retrieval_db(python_bin: str) -> int:
+    print("Building retrieval database...")
+    return _run(
+        [
+            python_bin,
+            "-c",
+            (
+                "import json; "
+                "from app.retrieval import build_db; "
+                "print(json.dumps(build_db()))"
+            ),
+        ]
+    )
+
+
+def cmd_setup(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py setup [--no-venv] [--with-llm] [--python <path-or-command>] [--install-python]")
+        return 0
+
+    use_venv = "--no-venv" not in args
+    install_python = "--install-python" in args
+
+    python_override: str | None = None
+    for idx, arg in enumerate(args):
+        if arg.startswith("--python="):
+            python_override = arg.split("=", 1)[1].strip()
+        elif arg == "--python" and idx + 1 < len(args):
+            python_override = args[idx + 1].strip()
+
+    if use_venv:
+        selected_python = _select_supported_python_command(override=python_override)
+        if selected_python is None and install_python:
+            install_result = _attempt_python_install()
+            if install_result != 0:
+                print("Automatic Python install failed.")
+                return install_result
+            selected_python = _select_supported_python_command(override=python_override)
+
+        if selected_python is None:
+            print(
+                "No supported Python interpreter found for venv creation.\n"
+                "Supported range is >=3.10 and <3.13.\n"
+                "Install Python 3.10, 3.11, or 3.12, then rerun setup.\n"
+                "Optional: run 'python run.py setup --install-python' to attempt automated install."
+            )
+            return 2
+        python_bin = _ensure_venv(selected_python)
+    else:
+        current_version = sys.version_info[:2]
+        if not _is_supported_version(current_version):
+            print(
+                "Warning: Current Python "
+                f"{_version_text((current_version[0], current_version[1]))} is outside the recommended range "
+                "(>=3.10 and <3.13). Some dependencies may fail."
+            )
+        python_bin = sys.executable
+
+    print("Installing dependencies from requirements.txt")
+    result = _run([python_bin, "-m", "pip", "install", "-r", "requirements.txt"])
+    if result != 0:
+        return result
+
+    result = _download_embedding_model(python_bin)
+    if result != 0:
+        print("Failed to download embedding model during setup.")
+        return result
+
+    result = _build_retrieval_db(python_bin)
+    if result != 0:
+        print("Failed to build retrieval database during setup.")
+        return result
+
+    if "--with-llm" in args:
+        return _download_llm_assets(python_bin)
+
+    return 0
+
+
+def cmd_api(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py api")
+        return 0
+
+    python_bin = _venv_python() if VENV_DIR.exists() else sys.executable
+    return _run([python_bin, "-m", "uvicorn", "app.main:app", "--reload", "--port", "8000"])
+
+
+def cmd_ui(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py ui")
+        return 0
+
+    python_bin = _venv_python() if VENV_DIR.exists() else sys.executable
+    return _run([python_bin, "-m", "streamlit", "run", "ui/streamlit_app.py", "--server.port", "8501"])
+
+
+def cmd_test(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py test")
+        return 0
+
+    python_bin = _venv_python() if VENV_DIR.exists() else sys.executable
+    print("Running pytest")
+    if os.getenv("PYTEST_CURRENT_TEST"):
+        # Prevent recursive pytest invocation when run.py is tested by pytest.
+        print("Detected pytest context; skipping nested pytest execution.")
+        return 0
+    return _run([python_bin, "-m", "pytest", "-q"])
+
+
+def cmd_test_matrix(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py test-matrix [-- <tox args>]")
+        return 0
+
+    python_bin = _venv_python() if VENV_DIR.exists() else sys.executable
+    if os.getenv("PYTEST_CURRENT_TEST"):
+        print("Detected pytest context; skipping nested tox execution.")
+        return 0
+
+    has_tox_in_python = _run(
+        [python_bin, "-c", "import importlib.util, sys; sys.exit(0 if importlib.util.find_spec('tox') else 1)"]
+    ) == 0
+
+    print("Running tox test matrix")
+    if has_tox_in_python:
+        return _run([python_bin, "-m", "tox", *args])
+
+    if shutil.which("tox"):
+        return _run(["tox", *args])
+
+    print(
+        "tox is not installed in the active environment.\n"
+        "Install it with one of:\n"
+        f"  {python_bin} -m pip install tox\n"
+        "  python -m pip install tox"
+    )
+    return 2
+
+
+def cmd_fullstack(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py fullstack")
+        return 0
+
+    python_bin = _venv_python() if VENV_DIR.exists() else sys.executable
+    processes: list[subprocess.Popen] = []
+    try:
+        api_proc = subprocess.Popen(
+            [python_bin, "-m", "uvicorn", "app.main:app", "--reload", "--port", "8000"],
+            cwd=PROJECT_ROOT,
+        )
+        processes.append(api_proc)
+        time.sleep(1.5)
+
+        ui_proc = subprocess.Popen(
+            [python_bin, "-m", "streamlit", "run", "ui/streamlit_app.py", "--server.port", "8501"],
+            cwd=PROJECT_ROOT,
+        )
+        processes.append(ui_proc)
+        print("API: http://127.0.0.1:8000")
+        print("UI:  http://127.0.0.1:8501")
+
+        ui_proc.wait()
+        return ui_proc.returncode or 0
+    except KeyboardInterrupt:
+        return 0
+    finally:
+        for proc in processes:
+            if proc.poll() is None:
+                proc.terminate()
+                try:
+                    proc.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    proc.kill()
+
+
+def _docker_unavailable() -> int:
+    print("Docker is not installed or not on PATH. Use local commands instead (setup/api/ui/fullstack/test).")
+    return 2
+
+
+def cmd_docker_build(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py docker-build")
+        return 0
+    if not _has_docker():
+        return _docker_unavailable()
+    return _run(["docker", "compose", "build"])
+
+
+def cmd_docker_api(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py docker-api")
+        return 0
+    if not _has_docker():
+        return _docker_unavailable()
+    return _run(["docker", "compose", "up", "api"])
+
+
+def cmd_docker_fullstack(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py docker-fullstack")
+        return 0
+    if not _has_docker():
+        return _docker_unavailable()
+    return _run(["docker", "compose", "up", "api", "ui"])
+
+
+def cmd_docker_down(args: list[str]) -> int:
+    if "--help" in args:
+        print("Usage: python run.py docker-down")
+        return 0
+    if not _has_docker():
+        return _docker_unavailable()
+    return _run(["docker", "compose", "down"])
+
+
+def show_help() -> int:
+    print(
+        """
+Customer FAQ Assistant - Commands
+
+  python run.py setup [--no-venv] [--with-llm] [--python <exe>] [--install-python]
+                                                 Install dependencies
+  python run.py api                             Start FastAPI backend
+  python run.py ui                              Start Streamlit UI
+  python run.py fullstack                       Start API + UI together
+  python run.py test                            Run pytest
+  python run.py test-matrix                     Run tox matrix (py310/py311/py312)
+  python run.py help                            Show this help
+
+Optional Docker helpers:
+  python run.py docker-build
+  python run.py docker-api
+  python run.py docker-fullstack
+  python run.py docker-down
+""".strip()
+    )
+    return 0
+
+
+def main() -> int:
+    os.chdir(PROJECT_ROOT)
+
+    if len(sys.argv) < 2:
+        return show_help()
+
+    command = sys.argv[1].lower()
+    args = sys.argv[2:]
+
+    commands = {
+        "setup": cmd_setup,
+        "api": cmd_api,
+        "ui": cmd_ui,
+        "fullstack": cmd_fullstack,
+        "test": cmd_test,
+        "test-matrix": cmd_test_matrix,
+        "help": lambda _args: show_help(),
+        "docker-build": cmd_docker_build,
+        "docker-api": cmd_docker_api,
+        "docker-fullstack": cmd_docker_fullstack,
+        "docker-down": cmd_docker_down,
+    }
+
+    if command not in commands:
+        print(f"Unknown command: {command}")
+        show_help()
+        return 1
+
+    return commands[command](args)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..c31bb72b
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,54 @@
+import sys
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+
+# Ensure project root is importable for app/ and ui/ modules.
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+
+@pytest.fixture(autouse=True)
+def default_generator_env(monkeypatch: pytest.MonkeyPatch):
+    """
+    Force deterministic generator mode for all tests unless explicitly overridden.
+    
+    This fixture forces mock generator usage for deterministic tests.
+    
+    Spec: generation-mock.md, generation-optional-llm.md
+    Requirement: "Default generator MUST be selected when no generator is specified or when `generator=mock`"
+    """
+    monkeypatch.setenv("RAG_GENERATOR", "mock")
+
+
+@pytest.fixture()
+def client() -> TestClient:
+    """
+    Provide a FastAPI TestClient for making HTTP requests to the API in tests.
+    
+    The app is imported lazily to provide clear failure messages if app.main
+    doesn't exist yet. This fixture is used by all API integration tests.
+    
+    Returns:
+        TestClient: Configured test client for the FastAPI application
+    
+    Spec: health-endpoint.md, ask-endpoint-validation.md, ask-response-contract.md
+    Note: Used by all endpoint tests to interact with the API
+    """
+    try:
+        from app.main import app
+    except Exception as exc:  # pragma: no cover - explicit failure path for missing app
+        pytest.fail(f"Could not import `app.main.app`: {exc}")
+
+    client_instance = TestClient(app)
+
+    # Ensure retrieval DB is built once tests start hitting /ask endpoints.
+    status_resp = client_instance.get("/db/status")
+    if status_resp.status_code == 200 and not status_resp.json().get("built", False):
+        build_resp = client_instance.post("/db/build")
+        if build_resp.status_code != 200:
+            pytest.fail(f"Could not build retrieval DB for tests: {build_resp.text}")
+
+    return client_instance
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 00000000..c672fca6
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,274 @@
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import pytest
+
+
+PROJECT_ROOT = Path(__file__).parent.parent
+RUN_PY = PROJECT_ROOT / "run.py"
+
+
+@pytest.mark.unit
+def test_run_py_exists():
+    """
+    Verify that run.py file exists in the project root.
+    
+    Spec: entrypoint-cli.md
+    Requirement: "A `run.py` script that manages setup, API, UI, and test commands"
+    """
+    assert RUN_PY.exists(), f"Expected run.py at {RUN_PY}"
+    assert RUN_PY.is_file()
+
+
+@pytest.mark.integration
+def test_run_py_help_command_prints_usage(monkeypatch):
+    """
+    Verify that 'python run.py help' prints usage and available commands.
+    
+    Spec: entrypoint-cli.md
+    Acceptance Criteria: "`python run.py help` prints usage and available commands"
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "help"],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    
+    assert result.returncode == 0, f"Expected exit code 0, got {result.returncode}"
+    output = result.stdout.lower()
+    
+    # Verify that help output mentions the required commands
+    assert "setup" in output, "Help should mention 'setup' command"
+    assert "api" in output, "Help should mention 'api' command"
+    assert "ui" in output, "Help should mention 'ui' command"
+    assert "fullstack" in output, "Help should mention 'fullstack' command"
+    assert "test" in output, "Help should mention 'test' command"
+    assert "help" in output, "Help should mention 'help' command"
+
+
+@pytest.mark.integration
+def test_run_py_setup_help_includes_python_selection_flags():
+    """
+    Verify that setup help includes interpreter-selection and install flags.
+
+    Spec: entrypoint-cli.md
+    Requirement: "`run.py setup` supports interpreter override and optional install flow"
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "setup", "--help"],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+
+    assert result.returncode == 0
+    output = result.stdout.lower()
+    assert "--python" in output
+    assert "--install-python" in output
+
+
+@pytest.mark.integration
+def test_run_py_test_command_executes_pytest():
+    """
+    Verify that 'python run.py test' runs pytest successfully.
+    
+    Spec: entrypoint-cli.md
+    Acceptance Criteria: "`python run.py test` runs pytest successfully"
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    env = dict(os.environ)
+    # Prevent nested pytest execution when running under pytest.
+    env["PYTEST_CURRENT_TEST"] = "1"
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "test"],
+        capture_output=True,
+        text=True,
+        timeout=120,
+        cwd=PROJECT_ROOT,
+        env=env,
+    )
+    
+    # Test command should execute pytest
+    # It may pass or fail, but should invoke pytest
+    output = result.stdout + result.stderr
+    assert "pytest" in output.lower() or "test" in output.lower() or "detected pytest context" in output.lower(), \
+        "Test command should invoke pytest"
+
+
+@pytest.mark.integration
+def test_run_py_invalid_command_fails_clearly():
+    """
+    Verify that run.py fails clearly with actionable error for invalid commands.
+    
+    Spec: entrypoint-cli.md
+    Requirement: "Commands MUST print clear status messages and fail clearly with actionable errors"
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "invalid_command_xyz"],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    
+    assert result.returncode != 0, "Invalid command should return non-zero exit code"
+    output = result.stdout + result.stderr
+    assert len(output) > 0, "Error message should be printed for invalid command"
+
+
+@pytest.mark.integration
+def test_run_py_no_arguments_shows_help():
+    """
+    Verify that running 'python run.py' without arguments shows help or usage.
+    
+    Spec: entrypoint-cli.md
+    Requirement: "Clear help output with available commands"
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY)],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    
+    # Should either show help or fail with usage message
+    output = result.stdout + result.stderr
+    assert len(output) > 0, "Should print usage or help when no arguments provided"
+
+
+@pytest.mark.integration
+def test_run_py_recognizes_setup_command():
+    """
+    Verify that run.py recognizes 'setup' command.
+    
+    Spec: entrypoint-cli.md
+    Requirement: "`run.py` MUST support these commands: `setup`"
+    Note: This test verifies command recognition, not full execution
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    # Using --help flag if available, or checking error message doesn't say "unknown command"
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "setup", "--help"],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    
+    # If --help isn't supported, command should still be recognized
+    output = result.stdout + result.stderr
+    assert "unknown" not in output.lower() or result.returncode == 0, \
+        "Setup command should be recognized"
+
+
+@pytest.mark.integration
+def test_run_py_recognizes_api_command():
+    """
+    Verify that run.py recognizes 'api' command.
+    
+    Spec: entrypoint-cli.md
+    Requirement: "`run.py` MUST support these commands: `api`"
+    Note: This test verifies command recognition, not full execution
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "api", "--help"],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    
+    output = result.stdout + result.stderr
+    assert "unknown" not in output.lower() or result.returncode == 0, \
+        "API command should be recognized"
+
+
+@pytest.mark.integration
+def test_run_py_recognizes_ui_command():
+    """
+    Verify that run.py recognizes 'ui' command.
+    
+    Spec: entrypoint-cli.md
+    Requirement: "`run.py` MUST support these commands: `ui`"
+    Note: This test verifies command recognition, not full execution
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "ui", "--help"],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    
+    output = result.stdout + result.stderr
+    assert "unknown" not in output.lower() or result.returncode == 0, \
+        "UI command should be recognized"
+
+
+@pytest.mark.integration
+def test_run_py_recognizes_fullstack_command():
+    """
+    Verify that run.py recognizes 'fullstack' command.
+    
+    Spec: entrypoint-cli.md
+    Requirement: "`run.py` MUST support these commands: `fullstack`"
+    Note: This test verifies command recognition, not full execution
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "fullstack", "--help"],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    
+    output = result.stdout + result.stderr
+    assert "unknown" not in output.lower() or result.returncode == 0, \
+        "Fullstack command should be recognized"
+
+
+@pytest.mark.integration
+def test_run_py_is_executable_with_python():
+    """
+    Verify that run.py can be executed with sys.executable.
+    
+    Spec: entrypoint-cli.md
+    Requirement: "`run.py` MUST be cross-platform and use `sys.executable` for subprocess calls"
+    """
+    if not RUN_PY.exists():
+        pytest.skip("run.py not yet implemented")
+    
+    result = subprocess.run(
+        [sys.executable, str(RUN_PY), "help"],
+        capture_output=True,
+        text=True,
+        timeout=10,
+    )
+    
+    # Should execute without import errors or syntax errors
+    assert "SyntaxError" not in result.stderr, "run.py should not have syntax errors"
+    assert "ImportError" not in result.stderr or result.returncode == 0, \
+        "run.py should handle imports gracefully"
diff --git a/tests/test_contract.py b/tests/test_contract.py
new file mode 100644
index 00000000..52b2a718
--- /dev/null
+++ b/tests/test_contract.py
@@ -0,0 +1,109 @@
+import pytest
+
+
+@pytest.mark.contract
+@pytest.mark.integration
+def test_ask_response_contract_contains_required_top_level_keys(client):
+    """
+    Verify that POST /ask response contains required top-level keys and types.
+    
+    Spec: ask-response-contract.md
+    Acceptance Criteria: "Contract test validates required top-level keys exist on every 200 response"
+    """
+    payload = {"question": "What are your savings account options?", "top_k": 3}
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+
+    assert "answer" in body
+    assert "sources" in body
+    assert "retrieval" in body
+
+    assert isinstance(body["answer"], str)
+    assert isinstance(body["sources"], list)
+    assert isinstance(body["retrieval"], dict)
+    assert body["answer"].strip() != ""
+
+
+@pytest.mark.contract
+@pytest.mark.integration
+@pytest.mark.requires_data
+def test_ask_response_sources_have_required_fields_when_present(client):
+    """
+    Verify that source items in response contain all required fields with correct types.
+    
+    Spec: ask-response-contract.md
+    Acceptance Criteria: "Contract test validates source item field presence and scalar types"
+                         "Contract test validates `answer`, `id`, `title`, and `snippet` are non-empty strings"
+    """
+    payload = {"question": "Tell me about checking accounts", "top_k": 3}
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+
+    for source in body["sources"]:
+        assert "id" in source
+        assert "title" in source
+        assert "snippet" in source
+        assert "score" in source
+
+        assert isinstance(source["id"], str)
+        assert isinstance(source["title"], str)
+        assert isinstance(source["snippet"], str)
+        assert isinstance(source["score"], (int, float))
+        assert source["id"].strip() != ""
+        assert source["title"].strip() != ""
+        assert source["snippet"].strip() != ""
+
+
+@pytest.mark.contract
+@pytest.mark.integration
+def test_ask_response_retrieval_metadata_has_required_fields(client):
+    """
+    Verify that retrieval metadata contains required fields and matches payload.
+    
+    Spec: ask-response-contract.md
+    Acceptance Criteria: "Retrieval metadata fields are always present and consistent with payload"
+    """
+    payload = {"question": "Do you offer auto loans?", "top_k": 2}
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+
+    retrieval = body["retrieval"]
+    assert "top_k" in retrieval
+    assert "matched" in retrieval
+    assert isinstance(retrieval["top_k"], int)
+    assert isinstance(retrieval["matched"], int)
+    assert retrieval["top_k"] == 2
+    assert retrieval["matched"] == len(body["sources"])
+
+
+@pytest.mark.contract
+@pytest.mark.integration
+def test_ask_fallback_keeps_stable_schema(client):
+    """
+    Verify that fallback response (no matches) preserves full schema contract.
+    
+    Spec: ask-response-contract.md
+    Acceptance Criteria: "Fallback path preserves full schema and uses empty `sources`"
+    """
+    payload = {
+        "question": "zxqyqv synthetic non banking phrase no match please",
+        "top_k": 3,
+    }
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+
+    assert set(body.keys()) == {"answer", "sources", "retrieval"}
+    assert isinstance(body["answer"], str)
+    assert body["answer"].strip() != ""
+    assert body["sources"] == []
+    assert isinstance(body["retrieval"], dict)
+    assert body["retrieval"]["top_k"] == 3
+    assert body["retrieval"]["matched"] == 0
diff --git a/tests/test_data_loader.py b/tests/test_data_loader.py
new file mode 100644
index 00000000..f2182055
--- /dev/null
+++ b/tests/test_data_loader.py
@@ -0,0 +1,99 @@
+import json
+import re
+from pathlib import Path
+
+import pytest
+
+
+FAQ_DIR = Path("data")
+
+
+def _extract_markdown_fields(path: Path) -> dict:
+    text = path.read_text(encoding="utf-8").strip()
+    id_match = re.search(r"(?im)^\s*id:\s*(.+)\s*$", text)
+    title_match = re.search(r"(?im)^\s*title:\s*(.+)\s*$", text)
+    doc_id = id_match.group(1).strip() if id_match else ""
+    title = title_match.group(1).strip() if title_match else ""
+    body = text
+    return {"id": doc_id, "title": title, "body": body}
+
+
+def _extract_json_fields(path: Path) -> dict:
+    payload = json.loads(path.read_text(encoding="utf-8"))
+    return {
+        "id": str(payload.get("id", "")).strip(),
+        "title": str(payload.get("title", "")).strip(),
+        "body": str(payload.get("body", "")).strip(),
+    }
+
+
+def _load_docs():
+    files = list(FAQ_DIR.glob("*.md")) + list(FAQ_DIR.glob("*.json"))
+    docs = []
+    for file_path in files:
+        if file_path.suffix == ".md":
+            docs.append(_extract_markdown_fields(file_path))
+        else:
+            docs.append(_extract_json_fields(file_path))
+    return docs
+
+
+@pytest.mark.unit
+def test_faq_directory_exists():
+    """
+    Verify that the FAQ corpus directory exists at data/.
+    
+    Spec: faq-data.md
+    Requirement: "Corpus MUST be local and committed to repository"
+    """
+    assert FAQ_DIR.exists(), "Expected FAQ directory at data"
+    assert FAQ_DIR.is_dir()
+
+
+@pytest.mark.unit
+@pytest.mark.requires_data
+def test_faq_corpus_size_within_expected_range():
+    """
+    Verify that FAQ corpus contains between 8 and 15 documents.
+    
+    Spec: faq-data.md
+    Requirement: "Corpus MUST contain between 8 and 15 documents"
+    Acceptance Criteria: "Corpus size is within defined range (8-15)"
+    """
+    docs = _load_docs()
+    assert 8 <= len(docs) <= 15
+
+
+@pytest.mark.unit
+@pytest.mark.requires_data
+def test_faq_docs_have_required_fields_and_non_empty_values():
+    """
+    Verify that all FAQ documents have required fields (id, title, body) with non-empty values.
+    
+    Spec: faq-data.md
+    Requirement: "Each document MUST include: `id`, `title`, `body`"
+    Acceptance Criteria: "Data loader can parse all corpus files without runtime errors"
+    """
+    docs = _load_docs()
+    assert docs, "No FAQ docs found in data"
+
+    for doc in docs:
+        assert set(doc.keys()) == {"id", "title", "body"}
+        assert isinstance(doc["id"], str) and doc["id"] != ""
+        assert isinstance(doc["title"], str) and doc["title"] != ""
+        assert isinstance(doc["body"], str) and doc["body"] != ""
+
+
+@pytest.mark.unit
+@pytest.mark.requires_data
+def test_faq_document_ids_are_unique():
+    """
+    Verify that all FAQ document IDs are unique across the corpus.
+    
+    Spec: faq-data.md
+    Requirement: "IDs MUST be unique across corpus"
+    Acceptance Criteria: "Document IDs are unique and non-empty"
+    """
+    docs = _load_docs()
+    ids = [doc["id"] for doc in docs]
+    assert len(ids) == len(set(ids))
diff --git a/tests/test_db.py b/tests/test_db.py
new file mode 100644
index 00000000..aa481405
--- /dev/null
+++ b/tests/test_db.py
@@ -0,0 +1,24 @@
+import pytest
+
+
+@pytest.mark.integration
+def test_db_status_endpoint_returns_expected_keys(client):
+    response = client.get("/db/status")
+    assert response.status_code == 200
+    body = response.json()
+
+    assert "built" in body
+    assert "doc_count" in body
+    assert "indexed_count" in body
+    assert "collection" in body
+
+
+@pytest.mark.integration
+def test_db_build_endpoint_builds_or_confirms_build(client):
+    response = client.post("/db/build")
+    assert response.status_code == 200
+    body = response.json()
+
+    assert body["doc_count"] > 0
+    assert body["indexed_count"] >= body["doc_count"]
+    assert body["built"] is True
diff --git a/tests/test_determinism.py b/tests/test_determinism.py
new file mode 100644
index 00000000..54258ec0
--- /dev/null
+++ b/tests/test_determinism.py
@@ -0,0 +1,19 @@
+import pytest
+
+
+@pytest.mark.integration
+def test_same_input_produces_same_output(client):
+    """
+    Verify that repeated identical requests produce identical answers (determinism).
+    
+    Spec: generation-mock.md
+    Acceptance Criteria: "Repeated identical requests produce identical answers"
+    """
+    payload = {"question": "Do you support fraud disputes in the mobile app?", "top_k": 3}
+
+    first = client.post("/ask", json=payload)
+    second = client.post("/ask", json=payload)
+
+    assert first.status_code == 200
+    assert second.status_code == 200
+    assert first.json() == second.json()
diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py
new file mode 100644
index 00000000..9bfa2c29
--- /dev/null
+++ b/tests/test_end_to_end.py
@@ -0,0 +1,53 @@
+import pytest
+
+
+@pytest.mark.integration
+def test_known_question_returns_expected_doc_and_answer(client):
+    """
+    Verify that a known question returns the expected top document and a non-empty answer.
+
+    Spec: retrieval-pipeline.md, generation-mock.md, ask-response-contract.md
+    Acceptance: known query -> top doc + answer + retrieval metadata
+    """
+    payload = {"question": "What are your checking account monthly fees?", "top_k": 3}
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+
+    assert body["sources"], "Expected at least one source for known question"
+    top_source = body["sources"][0]
+    assert top_source["id"] == "checking_accounts"
+    assert top_source["title"].lower().startswith("mockridge bank checking")
+    assert isinstance(body["answer"], str) and body["answer"].strip() != ""
+    assert body["retrieval"]["matched"] >= 1
+
+
+@pytest.mark.integration
+def test_pipeline_happy_path_returns_sorted_sources_and_metadata(client):
+    """
+    Verify end-to-end pipeline returns sorted sources, consistent metadata, and non-empty answer.
+
+    Spec: retrieval-pipeline.md, ask-response-contract.md, generation-mock.md
+    Acceptance: sorted scores, matched count, non-empty answer
+    """
+    payload = {"question": "Tell me about overdraft coverage and fees", "top_k": 2}
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+
+    assert body["retrieval"]["top_k"] == 2
+    assert body["retrieval"]["matched"] == len(body["sources"])
+
+    if len(body["sources"]) > 1:
+        scores = [s["score"] for s in body["sources"]]
+        assert scores == sorted(scores, reverse=True)
+
+    for src in body["sources"]:
+        assert isinstance(src["id"], str) and src["id"] != ""
+        assert isinstance(src["title"], str) and src["title"] != ""
+        assert isinstance(src["snippet"], str) and src["snippet"] != ""
+        assert isinstance(src["score"], (int, float))
+
+    assert isinstance(body["answer"], str) and body["answer"].strip() != ""
diff --git a/tests/test_generator_config.py b/tests/test_generator_config.py
new file mode 100644
index 00000000..4cb50754
--- /dev/null
+++ b/tests/test_generator_config.py
@@ -0,0 +1,42 @@
+import pytest
+
+
+@pytest.mark.integration
+def test_default_generator_mode_is_mock_deterministic(client):
+    """
+    Verify that default generator mode uses mock and produces deterministic output.
+    
+    Spec: generation-optional-llm.md
+    Acceptance Criteria: "With default environment, app uses mock generator"
+    """
+    payload = {"question": "How can I contact support?", "top_k": 3}
+    first = client.post("/ask", json=payload)
+    second = client.post("/ask", json=payload)
+
+    assert first.status_code == 200
+    assert second.status_code == 200
+    assert first.json() == second.json()
+
+
+@pytest.mark.optional
+@pytest.mark.integration
+def test_flan_t5_mode_is_opt_in_and_fails_clearly_when_unavailable(client):
+    """
+    Verify that flan-t5 mode routes through LLM adapter or fails clearly if unavailable.
+    
+    Spec: generation-optional-llm.md
+    Acceptance Criteria: "With `generator=flan-t5`, app routes generation through LLM adapter"
+    """
+    payload = {
+        "question": "What credit card options do you have?",
+        "top_k": 3,
+        "generator": "flan-t5",
+    }
+    response = client.post("/ask", json=payload)
+
+    # Accept success (if model is available) or explicit runtime failure.
+    assert response.status_code in {200, 500, 503}
+    if response.status_code in {500, 503}:
+        body = response.json()
+        as_text = str(body).lower()
+        assert "flan-t5" in as_text or "model" in as_text
diff --git a/tests/test_health.py b/tests/test_health.py
new file mode 100644
index 00000000..5c031ee5
--- /dev/null
+++ b/tests/test_health.py
@@ -0,0 +1,17 @@
+import pytest
+
+
+@pytest.mark.integration
+def test_health_returns_ok(client):
+    """
+    Verify that GET /health returns 200 with {"status": "ok"} JSON response.
+    
+    Spec: health-endpoint.md
+    Acceptance Criteria: "Calling `GET /health` returns status code `200`"
+                         "Response JSON includes key `status` with value `ok`"
+    """
+    response = client.get("/health")
+
+    assert response.status_code == 200
+    assert response.json() == {"status": "ok"}
+    assert "application/json" in response.headers.get("content-type", "")
diff --git a/tests/test_retrieval.py b/tests/test_retrieval.py
new file mode 100644
index 00000000..c1710a76
--- /dev/null
+++ b/tests/test_retrieval.py
@@ -0,0 +1,87 @@
+import pytest
+
+
+@pytest.mark.integration
+@pytest.mark.requires_data
+def test_known_query_returns_expected_top_document(client):
+    """
+    Verify that a known banking query retrieves the expected FAQ document.
+    
+    Spec: retrieval-pipeline.md
+    Acceptance Criteria: "A known banking query retrieves an expected FAQ document as top result"
+    """
+    payload = {"question": "What are your checking account monthly fees?", "top_k": 3}
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["retrieval"]["matched"] >= 1
+    assert len(body["sources"]) >= 1
+
+    top_source = body["sources"][0]
+    combined = f"{top_source['title']} {top_source['snippet']}".lower()
+    assert "checking" in combined or "checking_accounts" in top_source.get("id", "").lower()
+
+
+@pytest.mark.integration
+@pytest.mark.requires_data
+def test_top_k_controls_maximum_number_of_sources(client):
+    """
+    Verify that changing top_k parameter controls the maximum returned source count.
+    
+    Spec: retrieval-pipeline.md
+    Acceptance Criteria: "Changing `top_k` changes the maximum returned source count accordingly"
+    """
+    payload = {"question": "Tell me about bank account options", "top_k": 1}
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+    assert len(body["sources"]) <= 1
+    assert body["retrieval"]["top_k"] == 1
+    assert body["retrieval"]["matched"] == len(body["sources"])
+
+
+@pytest.mark.integration
+@pytest.mark.requires_data
+def test_sources_are_sorted_by_descending_score(client):
+    """
+    Verify that source list is sorted by relevance score in descending order.
+    
+    Spec: retrieval-pipeline.md
+    Acceptance Criteria: "Source list is sorted by score descending"
+    """
+    payload = {"question": "How do overdraft fees work?", "top_k": 5}
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+    if len(body["sources"]) < 2:
+        assert body["retrieval"]["matched"] == len(body["sources"])
+        return
+    scores = [source["score"] for source in body["sources"]]
+    assert scores == sorted(scores, reverse=True)
+
+
+@pytest.mark.integration
+def test_unknown_query_returns_fallback_with_empty_sources(client):
+    """
+    Verify that out-of-domain query triggers unmatched retrieval (fallback) path.
+    
+    Spec: retrieval-pipeline.md
+    Acceptance Criteria: "Unknown/out-of-domain query triggers unmatched retrieval path"
+    """
+    payload = {
+        "question": "quartz nebula hedgehog protocol 91821 unrelated",
+        "top_k": 3,
+    }
+    response = client.post("/ask", json=payload)
+
+    assert response.status_code == 200
+    body = response.json()
+
+    assert isinstance(body["answer"], str)
+    assert body["answer"].strip() != ""
+    assert body["sources"] == []
+    assert body["retrieval"]["top_k"] == 3
+    assert body["retrieval"]["matched"] == 0
diff --git a/tests/test_streamlit_smoke.py b/tests/test_streamlit_smoke.py
new file mode 100644
index 00000000..f87d818b
--- /dev/null
+++ b/tests/test_streamlit_smoke.py
@@ -0,0 +1,17 @@
+import importlib
+
+import pytest
+
+
+@pytest.mark.smoke
+def test_streamlit_app_module_imports():
+    """
+    Verify that the Streamlit UI module can be imported without errors.
+    
+    Spec: streamlit-ui.md
+    Acceptance Criteria: "UI runs locally against the API in default mock mode"
+    Note: This is a smoke test to ensure the UI module exists and has no import errors.
+    """
+    pytest.importorskip("streamlit")
+    module = importlib.import_module("ui.streamlit_app")
+    assert module is not None
diff --git a/tests/test_streamlit_ui_logic.py b/tests/test_streamlit_ui_logic.py
new file mode 100644
index 00000000..df1f50c6
--- /dev/null
+++ b/tests/test_streamlit_ui_logic.py
@@ -0,0 +1,53 @@
+import importlib
+
+import pytest
+
+
+@pytest.mark.unit
+def test_get_db_status_normalizes_payload(monkeypatch):
+    """
+    Verify DB status helper returns normalized minimal payload for the UI.
+
+    Spec: streamlit-ui.md
+    Acceptance Criteria: "DB status payload is normalized to `built` and `doc_count` for UI consumption"
+    """
+    pytest.importorskip("streamlit")
+    module = importlib.import_module("ui.streamlit_app")
+
+    class _FakeResponse:
+        status_code = 200
+
+        @staticmethod
+        def json():
+            return {"built": True, "doc_count": 11, "indexed_count": 11, "collection": "faq"}
+
+    monkeypatch.setattr(module.requests, "get", lambda *args, **kwargs: _FakeResponse())
+
+    built, payload, error = module._get_db_status()
+
+    assert built is True
+    assert payload == {"built": True, "doc_count": 11}
+    assert error == ""
+
+
+@pytest.mark.unit
+def test_get_db_status_handles_non_200(monkeypatch):
+    """
+    Verify DB status helper reports HTTP failures as a non-empty error string.
+
+    Spec: streamlit-ui.md
+    Acceptance Criteria: "DB status helper surfaces HTTP failures without crashing the UI flow"
+    """
+    pytest.importorskip("streamlit")
+    module = importlib.import_module("ui.streamlit_app")
+
+    class _FakeResponse:
+        status_code = 503
+
+    monkeypatch.setattr(module.requests, "get", lambda *args, **kwargs: _FakeResponse())
+
+    built, payload, error = module._get_db_status()
+
+    assert built is False
+    assert payload == {}
+    assert "DB status request failed (503)." in error
diff --git a/tests/test_validation.py b/tests/test_validation.py
new file mode 100644
index 00000000..d9f5a508
--- /dev/null
+++ b/tests/test_validation.py
@@ -0,0 +1,284 @@
+import pytest
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_missing_question_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when question field is missing.
+    
+    Spec: ask-endpoint-validation.md
+    Acceptance Criteria: "Missing `question` returns `400`"
+    """
+    response = client.post("/ask", json={})
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_question_too_short_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when question is shorter than 5 characters.
+    
+    Spec: ask-endpoint-validation.md
+    Acceptance Criteria: "`question` shorter than 5 characters returns `400`"
+    """
+    payload = {"question": "hey", "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_question_too_long_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when question is longer than 300 characters.
+    
+    Spec: ask-endpoint-validation.md
+    Acceptance Criteria: "`question` longer than 300 characters returns `400`"
+    """
+    payload = {"question": "x" * 301, "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_top_k_zero_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when top_k is 0.
+    
+    Spec: ask-endpoint-validation.md
+    Acceptance Criteria: "`top_k = 0` returns `400`"
+    """
+    payload = {"question": "What are your overdraft fees?", "top_k": 0}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_top_k_above_range_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when top_k is above the valid range (>5).
+    
+    Spec: ask-endpoint-validation.md
+    Acceptance Criteria: "`top_k > 5` returns `400`"
+    """
+    payload = {"question": "What are your overdraft fees?", "top_k": 6}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_omitted_top_k_uses_default(client):
+    """
+    Verify that POST /ask accepts omitted top_k and defaults to 3.
+    
+    Spec: ask-endpoint-validation.md
+    Acceptance Criteria: "Omitted `top_k` is accepted and treated as `3`"
+    """
+    payload = {"question": "What are your overdraft fees?"}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 200
+    assert response.json()["retrieval"]["top_k"] == 3
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_question_min_length_is_allowed(client):
+    """
+    Verify that POST /ask accepts a question at minimum length (5 characters).
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`question` is required string with length `5..300`"
+    """
+    payload = {"question": "abcde", "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 200
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_question_max_length_is_allowed(client):
+    """
+    Verify that POST /ask accepts a question at maximum length (300 characters).
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`question` is required string with length `5..300`"
+    """
+    payload = {"question": "x" * 300, "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 200
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_top_k_negative_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when top_k is negative.
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`top_k` is optional integer with default `3` and valid range `1..5`"
+    """
+    payload = {"question": "What are your overdraft fees?", "top_k": -1}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_top_k_non_int_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when top_k is not an integer.
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`top_k` is optional integer with default `3` and valid range `1..5`"
+    """
+    payload = {"question": "What are your overdraft fees?", "top_k": "three"}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_empty_string_question_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when question is an empty string.
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`question` is required string with length `5..300`"
+    Edge case: Empty string is different from missing field
+    """
+    payload = {"question": "", "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_whitespace_only_question_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when question contains only whitespace.
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`question` is required string with length `5..300`"
+    Edge case: Whitespace-only strings should not be accepted
+    """
+    payload = {"question": "     ", "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_null_question_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when question is null/None.
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`question` is required string with length `5..300`"
+    Edge case: Null values should be rejected
+    """
+    payload = {"question": None, "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_top_k_float_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when top_k is a float.
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`top_k` is optional integer with default `3` and valid range `1..5`"
+    Edge case: Float values like 2.5 should be rejected
+    """
+    payload = {"question": "What are your overdraft fees?", "top_k": 2.5}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_top_k_null_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when top_k is explicitly null/None.
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`top_k` is optional integer with default `3` and valid range `1..5`"
+    Edge case: Explicit null is different from omitted field and should be rejected
+    """
+    payload = {"question": "What are your overdraft fees?", "top_k": None}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_question_with_special_characters_is_allowed(client):
+    """
+    Verify that POST /ask accepts questions with special characters.
+    
+    Spec: ask-endpoint-validation.md
+    Edge case: Special characters should be allowed as long as length is valid
+    """
+    payload = {"question": "What's the APR% for credit cards?", "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 200
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_question_with_unicode_is_allowed(client):
+    """
+    Verify that POST /ask accepts questions with unicode characters.
+    
+    Spec: ask-endpoint-validation.md
+    Edge case: Unicode characters should be supported
+    """
+    payload = {"question": "¿Cuáles son las tarifas de sobregiro?", "top_k": 3}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 200
+
+
+@pytest.mark.validation
+@pytest.mark.integration
+def test_ask_generator_invalid_returns_400(client):
+    """
+    Verify that POST /ask returns 400 when generator is not an allowed value.
+    
+    Spec: ask-endpoint-validation.md
+    Requirement: "`generator` is optional string with allowed values `mock` or `flan-t5`"
+    """
+    payload = {"question": "What are your overdraft fees?", "top_k": 3, "generator": "gpt4"}
+    response = client.post("/ask", json=payload)
+    assert response.status_code == 400
+    assert response.headers.get("content-type", "").startswith("application/json")
+    assert "detail" in response.json()
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 00000000..b0e85329
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,13 @@
+[tox]
+envlist = py310, py311, py312
+skip_missing_interpreters = true
+isolated_build = false
+
+[testenv]
+description = Run pytest for {envname}
+setenv =
+    PIP_EXTRA_INDEX_URL = https://download.pytorch.org/whl/cpu
+deps =
+    -rrequirements.txt
+commands =
+    python -m pytest -q
diff --git a/ui/__init__.py b/ui/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ui/streamlit_app.py b/ui/streamlit_app.py
new file mode 100644
index 00000000..64897a11
--- /dev/null
+++ b/ui/streamlit_app.py
@@ -0,0 +1,249 @@
+import os
+from html import escape
+
+import requests
+import streamlit as st
+
+
+API_URL = os.getenv("API_URL", "http://127.0.0.1:8000")
+
+
+def _get_db_status() -> tuple[bool, dict, str]:
+    try:
+        response = requests.get(f"{API_URL}/db/status", timeout=20)
+        if response.status_code != 200:
+            return False, {}, f"DB status request failed ({response.status_code})."
+        body = response.json()
+        minimal = {
+            "built": bool(body.get("built", False)),
+            "doc_count": int(body.get("doc_count", 0)),
+        }
+        return bool(body.get("built", False)), minimal, ""
+    except requests.RequestException as exc:
+        return False, {}, f"Could not reach API at {API_URL}: {exc}"
+
+
+def main() -> None:
+    st.set_page_config(page_title="Customer FAQ Assistant", layout="centered")
+    st.markdown(
+        """
+        <style>
+            .block-container {
+                padding-top: 1rem;
+                padding-bottom: 1rem;
+            }
+            [data-testid="stHeader"] {
+                height: 0rem;
+            }
+            .chat-row {
+                display: flex;
+                margin: 0.35rem 0;
+            }
+            .chat-row.user {
+                justify-content: flex-end;
+            }
+            .chat-row.assistant {
+                justify-content: flex-start;
+            }
+            .chat-bubble {
+                max-width: 78%;
+                padding: 0.6rem 0.8rem;
+                border-radius: 0.9rem;
+                line-height: 1.35;
+                word-wrap: break-word;
+                color: #111111;
+            }
+            .chat-row.user .chat-bubble {
+                background: #d8ebff;
+                border-bottom-right-radius: 0.25rem;
+            }
+            .chat-row.assistant .chat-bubble {
+                background: #f2f3f5;
+                border-bottom-left-radius: 0.25rem;
+            }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+    if "chat_messages" not in st.session_state:
+        st.session_state.chat_messages = [
+            {
+                "role": "assistant",
+                "content": "Welcome to the Customer FAQ Assistant. Enter a question to get started.",
+                "retrieval": None,
+                "sources": [],
+            }
+        ]
+    if "question_input" not in st.session_state:
+        st.session_state.question_input = ""
+    if "pending_example" not in st.session_state:
+        st.session_state.pending_example = None
+    if "pending_submission" not in st.session_state:
+        st.session_state.pending_submission = None
+    if "clear_question_input" not in st.session_state:
+        st.session_state.clear_question_input = False
+
+    built, status_payload, status_error = _get_db_status()
+
+    if status_error:
+        st.error(status_error)
+        st.stop()
+
+    st.title("Customer FAQ Assistant")
+    sub_col_1, sub_col_2 = st.columns([4, 2])
+    with sub_col_1:
+        st.caption("Ask questions about Mockridge Bank's fictional products and services.")
+    with sub_col_2:
+        status_label = "Ready" if built else "Not Built"
+        st.caption(f"DB: {status_label} | docs: {status_payload.get('doc_count', 0)}")
+
+    if not built:
+        st.warning("Retrieval database is not built yet. Press the button below to enable chat.")
+        if st.button("Build DB"):
+            try:
+                response = requests.post(f"{API_URL}/db/build", timeout=120)
+                if response.status_code != 200:
+                    detail = response.json().get("detail", "Unknown error")
+                    st.error(f"Build failed ({response.status_code}): {detail}")
+                    st.stop()
+                st.success("Database built successfully. You can now ask questions.")
+                st.rerun()
+            except requests.RequestException as exc:
+                st.error(f"Could not build DB at {API_URL}: {exc}")
+                st.stop()
+
+    chat_window = st.container(height=420, border=True)
+    with chat_window:
+        for msg in st.session_state.chat_messages:
+            role = "user" if msg.get("role") == "user" else "assistant"
+            st.markdown(
+                f"<div class='chat-row {role}'><div class='chat-bubble'>{escape(str(msg.get('content', '')))}</div></div>",
+                unsafe_allow_html=True,
+            )
+            if role == "assistant":
+                retrieval = msg.get("retrieval")
+                sources = msg.get("sources", [])
+                if sources or retrieval:
+                    with st.expander("Sources"):
+                        if sources:
+                            for source in sources:
+                                st.markdown(f"**{source['title']}** (score: {source['score']})")
+                                st.write(source["snippet"])
+                        if retrieval:
+                            st.divider()
+                            st.caption("Retrieval Details")
+                            st.write(retrieval)
+
+    controls_disabled = (not built) or (st.session_state.pending_submission is not None)
+    examples = [
+        "What are your checking account monthly fees?",
+        "How do overdraft fees work?",
+        "What can I do with the mobile app?",
+    ]
+
+    if st.session_state.pending_example is not None:
+        st.session_state.question_input = st.session_state.pending_example
+        st.session_state.pending_example = None
+    if st.session_state.clear_question_input:
+        st.session_state.question_input = ""
+        st.session_state.clear_question_input = False
+
+    question = st.text_area(
+        "Message",
+        placeholder="Enter your question here",
+        key="question_input",
+        disabled=controls_disabled,
+    )
+    action_col_1, action_col_2 = st.columns(2)
+    has_user_messages = any(msg.get("role") == "user" for msg in st.session_state.chat_messages)
+    with action_col_1:
+        if has_user_messages and st.button("Clear Chat", key="clear_chat_btn"):
+            st.session_state.chat_messages = [
+                {
+                    "role": "assistant",
+                    "content": "Welcome to the Customer FAQ Assistant. Enter a question to get started.",
+                    "retrieval": None,
+                    "sources": [],
+                }
+            ]
+            st.session_state.clear_question_input = True
+            st.session_state.pending_submission = None
+            st.rerun()
+    with action_col_2:
+        button_col_1, button_col_2 = st.columns([3, 1])
+        with button_col_1:
+            st.write("")
+        with button_col_2:
+            submit_clicked = st.button("Submit", key="submit_btn", disabled=controls_disabled)
+
+    # Processing status placeholder appears between submit buttons and examples
+    processing_placeholder = st.empty()
+
+    st.caption("Try an example:")
+    chip_cols = st.columns(3)
+    for idx, example in enumerate(examples):
+        with chip_cols[idx]:
+            if st.button(example, key=f"example_{idx}", disabled=controls_disabled):
+                st.session_state.pending_example = example
+                st.rerun()
+    control_col_1, control_col_2 = st.columns(2)
+    with control_col_1:
+        top_k = st.slider("Number of sources to retrieve", min_value=1, max_value=5, value=3, disabled=controls_disabled)
+    with control_col_2:
+        generator = st.selectbox(
+            "Response generator type",
+            ["mock", "flan-t5"],
+            index=0,
+            disabled=controls_disabled,
+            help="Mock is deterministic and reliable (recommended). flan-t5 is a small instruction-tuned LLM (80M params) for experimental local generation. Install via 'run.py setup --with-llm'",
+        )
+
+    if submit_clicked:
+        if not question.strip():
+            st.warning("Please enter a question before submitting.")
+            return
+
+        user_question = question.strip()
+        st.session_state.chat_messages.append({"role": "user", "content": user_question})
+        st.session_state.clear_question_input = True
+        st.session_state.pending_submission = {
+            "question": user_question,
+            "top_k": top_k,
+            "generator": generator,
+        }
+        st.rerun()
+
+    pending_submission = st.session_state.pending_submission
+    if pending_submission is not None:
+        try:
+            with processing_placeholder.container():
+                with st.spinner("Processing your question..."):
+                    response = requests.post(f"{API_URL}/ask", json=pending_submission, timeout=90)
+                    if response.status_code != 200:
+                        detail = response.json().get("detail", "Unknown error")
+                        if pending_submission["generator"] == "flan-t5" and "setup --with-llm" in detail:
+                            st.warning("LLM assets not installed. Run `python run.py setup --with-llm` first.")
+                        if response.status_code == 503 and "Database not built" in detail:
+                            st.warning("Database is not built. Use the Build DB button above.")
+                        st.error(f"Request failed ({response.status_code}): {detail}")
+                        st.session_state.pending_submission = None
+                        return
+
+                    body = response.json()
+                    st.session_state.chat_messages.append(
+                        {
+                            "role": "assistant",
+                            "content": body["answer"],
+                            "retrieval": body.get("retrieval"),
+                            "sources": body.get("sources", []),
+                        }
+                    )
+            st.session_state.pending_submission = None
+            st.rerun()
+        except requests.RequestException as exc:
+            st.session_state.pending_submission = None
+            st.error(f"Could not reach API at {API_URL}: {exc}")
+
+
+if __name__ == "__main__":
+    main()