diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 000000000..79a1e2881
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,196 @@
+name: Documentation
+
+on:
+  push:
+    branches: [main]
+    paths: 
+      - 'api/**'
+      - 'docs/**'
+      - 'crd-ref-docs.config.yaml'
+      - '.github/workflows/docs.yml'
+      - 'Makefile'
+  pull_request:
+    paths:
+      - 'api/**'
+      - 'docs/**'
+      - 'crd-ref-docs.config.yaml'
+      - '.github/workflows/docs.yml'
+      - 'Makefile'
+
+env:
+  GO_VERSION: '1.21'
+  PYTHON_VERSION: '3.11'
+
+jobs:
+  build-docs:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Go
+        uses: actions/setup-go@v4
+        with:
+          go-version: ${{ env.GO_VERSION }}
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Cache Go modules
+        uses: actions/cache@v3
+        with:
+          path: ~/go/pkg/mod
+          key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
+          restore-keys: |
+            ${{ runner.os }}-go-
+
+      - name: Cache Python dependencies
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+
+      - name: Install Go dependencies
+        run: |
+          make crd-ref-docs gen-crd-api-reference-docs
+
+      - name: Install Python dependencies
+        run: |
+          pip install -r docs/requirements.txt
+
+      - name: Generate API documentation
+        run: |
+          make api-docs
+
+      - name: Build documentation site
+        run: |
+          make docs-build
+
+      - name: Upload documentation artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: documentation-site
+          path: docs/site/
+          retention-days: 30
+
+      - name: Upload API documentation
+        uses: actions/upload-artifact@v4
+        with:
+          name: api-documentation
+          path: docs/content/reference/api.md
+          retention-days: 30
+
+  deploy-preview:
+    if: github.event_name == 'pull_request'
+    needs: build-docs
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Download documentation artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: documentation-site
+          path: docs/site/
+
+      - name: Deploy to Cloudflare Pages (Preview)
+        uses: cloudflare/pages-action@v1
+        with:
+          apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
+          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
+          projectName: llamastack-k8s-operator-docs
+          directory: docs/site
+          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
+          wranglerVersion: '3'
+
+  deploy-production:
+    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+    needs: build-docs
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Download documentation artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: documentation-site
+          path: docs/site/
+
+      - name: Deploy to Cloudflare Pages (Production)
+        uses: cloudflare/pages-action@v1
+        with:
+          apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
+          accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
+          projectName: llamastack-k8s-operator-docs
+          directory: docs/site
+          gitHubToken: ${{ secrets.GITHUB_TOKEN }}
+          wranglerVersion: '3'
+
+      - name: Update legacy API docs (backward compatibility)
+        run: |
+          make api-docs || echo "Legacy API docs target not found, skipping"
+
+      - name: Commit updated API docs
+        if: github.ref == 'refs/heads/main'
+        run: |
+          git config --local user.email "action@github.com"
+          git config --local user.name "GitHub Action"
+          git add docs/api-overview.md || true
+          git diff --staged --quiet || git commit -m "docs: update API documentation [skip ci]"
+          git push || true
+
+  validate-docs:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install dependencies
+        run: |
+          pip install -r docs/requirements.txt
+          pip install linkchecker
+
+      - name: Validate MkDocs configuration
+        run: |
+          cd docs && mkdocs build --clean --verbose
+
+      - name: Check for broken links (if built)
+        run: |
+          if [ -d "docs/site" ]; then
+            cd docs/site
+            python -m http.server 8000 &
+            sleep 5
+            linkchecker http://localhost:8000 --no-warnings --ignore-url=".*\.css$" --ignore-url=".*\.js$" || true
+            kill %1 2>/dev/null || true
+          fi
+
+  security-scan:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@master
+        with:
+          scan-type: 'fs'
+          scan-ref: 'docs/'
+          format: 'sarif'
+          output: 'trivy-results.sarif'
+
+      - name: Upload Trivy scan results
+        uses: github/codeql-action/upload-sarif@v3
+        if: always()
+        with:
+          sarif_file: 'trivy-results.sarif'
diff --git a/.gitignore b/.gitignore
index b0f43ee7d..75aecfa52 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,3 +34,26 @@ catalog-item
 config/manifests/bases/llama-stack-k8s-operator.clusterserviceversion.yaml
 
 .DS_Store
+
+# Documentation build artifacts
+docs/site/
+docs/content/reference/api.md
+
+# Python virtual environments and cache (for MkDocs)
+docs/.venv/
+docs/venv/
+docs/__pycache__/
+*.pyc
+*.pyo
+
+# MkDocs temporary files
+docs/.mkdocs_cache/
+
+# Wrangler/Cloudflare Pages
+.wrangler/
+wrangler.toml.bak
+
+# Documentation tool binaries (specific to our setup)
+bin/crd-ref-docs*
+bin/gen-crd-api-reference-docs*
+DOCUMENTATION_STRUCTURE.md
diff --git a/Makefile b/Makefile
index d6014f984..78a59a899 100644
--- a/Makefile
+++ b/Makefile
@@ -244,6 +244,7 @@ GOLANGCI_LINT ?= $(LOCALBIN)/golangci-lint
 YQ ?= $(LOCALBIN)/yq
 YAMLFMT ?= $(LOCALBIN)/yamlfmt
 CRD_REF_DOCS ?= $(LOCALBIN)/crd-ref-docs
+GEN_CRD_API_REF_DOCS ?= $(LOCALBIN)/gen-crd-api-reference-docs
 
 ## Tool Versions
 KUSTOMIZE_VERSION ?= v5.4.3
@@ -253,6 +254,7 @@ GOLANGCI_LINT_VERSION ?= v1.64.4
 YQ_VERSION ?= v4.45.3
 YAMLFMT_VERSION ?= v0.12.0
 CRD_REF_DOCS_VERSION = v0.2.0
+GEN_CRD_API_REF_DOCS_VERSION = v0.3.0
 
 .PHONY: kustomize
 kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary.
@@ -289,6 +291,11 @@ crd-ref-docs: $(CRD_REF_DOCS) ## Download crd-ref-docs locally if necessary.
 $(CRD_REF_DOCS): $(LOCALBIN)
 	$(call go-install-tool,$(CRD_REF_DOCS),github.com/elastic/crd-ref-docs,$(CRD_REF_DOCS_VERSION))
 
+.PHONY: gen-crd-api-reference-docs
+gen-crd-api-reference-docs: $(GEN_CRD_API_REF_DOCS) ## Download gen-crd-api-reference-docs locally if necessary.
+$(GEN_CRD_API_REF_DOCS): $(LOCALBIN)
+	$(call go-install-tool,$(GEN_CRD_API_REF_DOCS),github.com/ahmetb/gen-crd-api-reference-docs,$(GEN_CRD_API_REF_DOCS_VERSION))
+
 # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
 # $1 - target path with name of binary
 # $2 - package url which can be installed
@@ -338,10 +345,13 @@ OPERATOR_SDK = $(shell which operator-sdk)
 endif
 endif
 
+##@ Documentation
+
 .PHONY: api-docs
-API_DOCS_PATH = ./docs/api-overview.md
-api-docs: crd-ref-docs ## Creates API docs using https://github.com/elastic/crd-ref-docs
-	mkdir -p docs
+API_DOCS_PATH = ./docs/content/reference/api.md
+api-docs: crd-ref-docs gen-crd-api-reference-docs ## Generate comprehensive API documentation (HyperShift-style)
+	mkdir -p docs/content/reference
+	@echo "Generating API documentation..."
 	$(CRD_REF_DOCS) --source-path ./ --output-path $(API_DOCS_PATH) --renderer markdown --config ./crd-ref-docs.config.yaml
 	@# Combined command to remove .io links, ensure a trailing newline, and collapse multiple blank lines.
 	@sed -i.bak -e  '/^$$/N;/^\n$$/D' $(API_DOCS_PATH)
@@ -351,6 +361,41 @@ api-docs: crd-ref-docs ## Creates API docs using https://github.com/elastic/crd-
 		sed -i.bak -e '$${/^$$/d}' -e '$${N;/^\n$$/d}' $(API_DOCS_PATH); \
 	fi
 	rm -f $(API_DOCS_PATH).bak
+	@echo "API documentation generated at $(API_DOCS_PATH)"
+
+.PHONY: docs-build
+docs-build: api-docs ## Build complete documentation site
+	@echo "Building documentation site..."
+	@if [ ! -f docs/requirements.txt ]; then echo "Error: docs/requirements.txt not found"; exit 1; fi
+	@if command -v pip >/dev/null 2>&1; then \
+		pip install -r docs/requirements.txt; \
+	else \
+		echo "Warning: pip not found, assuming dependencies are installed"; \
+	fi
+	cd docs && mkdocs build
+	@echo "Documentation site built in docs/site/"
+
+.PHONY: docs-serve
+docs-serve: docs-build ## Serve documentation locally (like HyperShift's serve-containerized)
+	@echo "Starting documentation server at http://localhost:8000"
+	cd docs && mkdocs serve --dev-addr 0.0.0.0:8000
+
+.PHONY: docs-clean
+docs-clean: ## Clean documentation build artifacts
+	rm -rf docs/site/
+	rm -f docs/content/reference/api.md
+
+# Legacy target for backward compatibility
+.PHONY: api-docs-legacy
+API_DOCS_LEGACY_PATH = ./docs/api-overview.md
+api-docs-legacy: crd-ref-docs ## Creates legacy API docs (backward compatibility)
+	mkdir -p docs
+	$(CRD_REF_DOCS) --source-path ./ --output-path $(API_DOCS_LEGACY_PATH) --renderer markdown --config ./crd-ref-docs.config.yaml
+	@sed -i.bak -e '/\.io\/[^v][^1].*)/d' -e '/^$$/N;/^\n$$/D' $(API_DOCS_LEGACY_PATH)
+	@if sed --version >/dev/null 2>&1; then \
+		sed -i.bak -e '$${/^$$/d}' -e '$${N;/^\n$$/d}' $(API_DOCS_LEGACY_PATH); \
+	fi
+	rm -f $(API_DOCS_LEGACY_PATH).bak
 
 .PHONY: bundle
 bundle: manifests kustomize operator-sdk ## Generate bundle manifests and metadata, then validate generated files.
diff --git a/crd-ref-docs.config.yaml b/crd-ref-docs.config.yaml
index 970e755ad..632eb0db8 100644
--- a/crd-ref-docs.config.yaml
+++ b/crd-ref-docs.config.yaml
@@ -3,6 +3,32 @@ render:
     # RE2 regular expressions describing types that should be excluded from the generated documentation.
     ignoreTypes:
       - "(LlamaStackDistribution)List$"
-
-  # Version of Kubernetes to use when generating links to Kubernetes API documentation.
+      - ".*Status$"
+    # Add custom type mappings
+    typeDisplayNamePrefixOverrides:
+      "github.com/llamastack/llama-stack-k8s-operator/api/v1alpha1": ""
+  
+  # Enhanced rendering options
   kubernetesVersion: 1.31
+  markdownDisabled: false
+  frontMatter:
+    title: "API Reference"
+    description: "Complete API reference for LlamaStack Kubernetes Operator"
+    weight: 100
+  
+  # Custom sections
+  sections:
+    - title: "Overview"
+      content: |
+        This document contains the API reference for the LlamaStack Kubernetes Operator.
+        The operator manages LlamaStack distributions in Kubernetes clusters.
+        
+        ## Quick Links
+        
+        - [LlamaStackDistribution](#llamastackdistribution) - Main resource for deploying LlamaStack
+        - [Getting Started Guide](../getting-started/quick-start/) - Quick start tutorial
+        - [Examples](../examples/) - Real-world configuration examples
+        
+    - title: "Resource Types"
+      content: |
+        The LlamaStack Operator defines the following Kubernetes custom resources:
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 000000000..a91f2bcd7
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,321 @@
+# LlamaStack Operator Documentation
+
+This directory contains the documentation for the LlamaStack Kubernetes Operator, built using [MkDocs](https://www.mkdocs.org/) with the [Material theme](https://squidfunk.github.io/mkdocs-material/).
+
+## 🏗️ Architecture
+
+The documentation follows HyperShift's approach with enhanced features:
+
+```mermaid
+graph TD
+    A[API Types] --> B[Documentation Generator]
+    B --> C[MkDocs Build]
+    C --> D[Static Site]
+    D --> E[Cloudflare Pages]
+    
+    F[GitHub Actions] --> G[Auto Build]
+    G --> H[Deploy Preview]
+    G --> I[Deploy Production]
+    
+    J[Pull Request] --> K[Preview Deployment]
+    K --> L[Review & Test]
+    L --> M[Merge & Deploy]
+```
+
+## 📁 Structure
+
+```
+docs/
+├── mkdocs.yml              # MkDocs configuration
+├── requirements.txt        # Python dependencies
+├── README.md              # This file
+├── api-doc-gen/           # API documentation generation config
+│   └── config.json        # HyperShift-style API doc config
+└── content/               # Documentation content
+    ├── index.md           # Homepage
+    ├── getting-started/   # Installation and setup guides
+    │   ├── installation.md
+    │   ├── quick-start.md
+    │   └── configuration.md
+    ├── how-to/           # Task-oriented guides
+    │   ├── deploy-llamastack.md
+    │   ├── configure-storage.md
+    │   ├── scaling.md
+    │   ├── monitoring.md
+    │   └── troubleshooting.md
+    ├── reference/        # API and configuration reference
+    │   ├── api.md        # Generated API documentation
+    │   ├── configuration.md
+    │   └── cli.md
+    ├── examples/         # Real-world examples
+    │   ├── basic-deployment.md
+    │   ├── production-setup.md
+    │   └── custom-images.md
+    ├── contributing/     # Development guides
+    │   ├── development.md
+    │   ├── testing.md
+    │   └── documentation.md
+    ├── stylesheets/      # Custom CSS
+    │   └── extra.css
+    └── javascripts/      # Custom JavaScript
+        └── extra.js
+```
+
+## 🚀 Quick Start
+
+### Prerequisites
+
+- **Go 1.24+** - For API documentation generation tools
+- **Python 3.8+** - For MkDocs and dependencies
+- **pip3** - Python package manager
+
+### Local Development
+
+1. **Install dependencies**:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+2. **Generate API documentation**:
+   ```bash
+   make api-docs
+   ```
+
+3. **Build and serve locally**:
+   ```bash
+   make docs-serve
+   ```
+
+4. **Or use the build script**:
+   ```bash
+   ./scripts/build-docs.sh
+   ```
+
+The documentation will be available at `http://localhost:8000`.
+
+## 🛠️ Development
+
+### Adding New Content
+
+1. **Create new markdown files** in the appropriate `content/` subdirectory
+2. **Update navigation** in `mkdocs.yml`
+3. **Test locally** with `make docs-serve`
+4. **Submit a pull request**
+
+### API Documentation
+
+The API documentation is automatically generated from Go type definitions:
+
+- **Source**: `api/v1alpha1/llamastackdistribution_types.go`
+- **Config**: `crd-ref-docs.config.yaml`
+- **Output**: `docs/content/reference/api.md`
+
+To regenerate API docs:
+```bash
+make api-docs
+```
+
+### Styling and Theming
+
+- **CSS**: Add custom styles to `content/stylesheets/extra.css`
+- **JavaScript**: Add functionality to `content/javascripts/extra.js`
+- **Theme**: Configure in `mkdocs.yml` under the `theme` section
+
+### Interactive Features
+
+The documentation includes several interactive features:
+
+- **Code copy buttons** - Automatic copy-to-clipboard for code blocks
+- **YAML validator** - Validate LlamaStackDistribution configurations
+- **Search enhancements** - Improved search with suggestions
+- **Navigation aids** - Breadcrumbs, edit links, and page navigation
+
+## 🔧 Configuration
+
+### MkDocs Configuration
+
+The main configuration is in [`mkdocs.yml`](mkdocs.yml):
+
+- **Site metadata** - Title, description, URLs
+- **Theme configuration** - Material theme with custom colors
+- **Navigation structure** - Page organization
+- **Plugins** - Search, Mermaid diagrams
+- **Markdown extensions** - Code highlighting, admonitions, etc.
+
+### API Documentation Configuration
+
+API documentation generation is configured in:
+
+- **[`crd-ref-docs.config.yaml`](../crd-ref-docs.config.yaml)** - Main configuration
+- **[`api-doc-gen/config.json`](api-doc-gen/config.json)** - HyperShift-style configuration
+
+### Build Configuration
+
+- **[`requirements.txt`](requirements.txt)** - Python dependencies
+- **[`../Makefile`](../Makefile)** - Build targets and tool installation
+- **[`../scripts/build-docs.sh`](../scripts/build-docs.sh)** - Comprehensive build script
+
+## 🚀 Deployment
+
+### Cloudflare Pages
+
+The documentation is automatically deployed to Cloudflare Pages:
+
+- **Production**: `https://llamastack-k8s-operator.pages.dev`
+- **Preview**: Automatic preview deployments for pull requests
+- **Configuration**: [`../wrangler.toml`](../wrangler.toml)
+
+### GitHub Actions
+
+Automated builds and deployments are handled by GitHub Actions:
+
+- **Workflow**: [`.github/workflows/docs.yml`](../.github/workflows/docs.yml)
+- **Triggers**: Changes to `api/`, `docs/`, or configuration files
+- **Features**: Build validation, security scanning, preview deployments
+
+### Manual Deployment
+
+For manual deployments:
+
+```bash
+# Build the documentation
+make docs-build
+
+# Deploy to Cloudflare Pages (requires wrangler CLI)
+wrangler pages deploy docs/site --project-name llamastack-k8s-operator-docs
+```
+
+## 📊 Analytics and Monitoring
+
+### Performance Monitoring
+
+- **Build time tracking** - Monitor documentation build performance
+- **Page load metrics** - Track user experience
+- **Search analytics** - Understand user search patterns
+
+### Content Analytics
+
+- **Popular pages** - Identify most-visited content
+- **User flow** - Understand navigation patterns
+- **Feedback collection** - Gather user feedback
+
+## 🔍 Quality Assurance
+
+### Automated Checks
+
+- **Link validation** - Check for broken internal and external links
+- **Markup validation** - Ensure valid HTML output
+- **Accessibility testing** - WCAG 2.1 AA compliance
+- **Performance testing** - Page load speed optimization
+
+### Manual Review Process
+
+1. **Content review** - Technical accuracy and clarity
+2. **Design review** - Visual consistency and usability
+3. **Accessibility review** - Screen reader compatibility
+4. **Mobile testing** - Responsive design validation
+
+## 🛡️ Security
+
+### Content Security
+
+- **Input validation** - Sanitize user-generated content
+- **XSS protection** - Prevent cross-site scripting
+- **CSRF protection** - Secure form submissions
+
+### Deployment Security
+
+- **HTTPS enforcement** - All traffic encrypted
+- **Security headers** - Comprehensive security header configuration
+- **Dependency scanning** - Regular security updates
+
+## 🤝 Contributing
+
+### Documentation Guidelines
+
+1. **Write for your audience** - Consider user experience level
+2. **Use clear headings** - Organize content hierarchically
+3. **Include examples** - Provide practical, working examples
+4. **Test instructions** - Verify all commands and procedures
+5. **Update navigation** - Ensure new content is discoverable
+
+### Style Guide
+
+- **Tone**: Professional but approachable
+- **Voice**: Active voice preferred
+- **Formatting**: Consistent use of markdown features
+- **Code examples**: Complete, runnable examples
+- **Screenshots**: High-quality, up-to-date images
+
+### Review Process
+
+1. **Create feature branch** - `docs/feature-name`
+2. **Make changes** - Follow style guide
+3. **Test locally** - Verify build and functionality
+4. **Submit pull request** - Include preview link
+5. **Address feedback** - Respond to review comments
+6. **Merge and deploy** - Automatic deployment on merge
+
+## 📚 Resources
+
+### Documentation Tools
+
+- **[MkDocs](https://www.mkdocs.org/)** - Static site generator
+- **[Material for MkDocs](https://squidfunk.github.io/mkdocs-material/)** - Theme
+- **[crd-ref-docs](https://github.com/elastic/crd-ref-docs)** - API documentation generator
+- **[Mermaid](https://mermaid-js.github.io/)** - Diagram generation
+
+### Deployment Platforms
+
+- **[Cloudflare Pages](https://pages.cloudflare.com/)** - Static site hosting
+- **[GitHub Actions](https://github.com/features/actions)** - CI/CD automation
+- **[Wrangler](https://developers.cloudflare.com/workers/wrangler/)** - Cloudflare CLI
+
+### Best Practices
+
+- **[Diátaxis](https://diataxis.fr/)** - Documentation framework
+- **[Google Developer Documentation Style Guide](https://developers.google.com/style)**
+- **[Write the Docs](https://www.writethedocs.org/)** - Documentation community
+
+## 🆘 Troubleshooting
+
+### Common Issues
+
+**Build failures:**
+```bash
+# Check dependencies
+pip install -r requirements.txt
+make crd-ref-docs
+
+# Clean and rebuild
+make docs-clean
+make docs-build
+```
+
+**API documentation not updating:**
+```bash
+# Regenerate API docs
+make api-docs
+
+# Check source files
+git status api/v1alpha1/
+```
+
+**Local server issues:**
+```bash
+# Check port availability
+lsof -i :8000
+
+# Use different port
+cd docs && mkdocs serve --dev-addr 0.0.0.0:8001
+```
+
+### Getting Help
+
+- **GitHub Issues**: [Report bugs and request features](https://github.com/llamastack/llama-stack-k8s-operator/issues)
+- **Discussions**: [Community discussions](https://github.com/llamastack/llama-stack-k8s-operator/discussions)
+- **Documentation**: [Contributing guide](content/contributing/documentation.md)
+
+## 📄 License
+
+This documentation is licensed under the Apache License 2.0. See the [LICENSE](../LICENSE) file for details.
diff --git a/docs/api-doc-gen/config.json b/docs/api-doc-gen/config.json
new file mode 100644
index 000000000..3a144f96a
--- /dev/null
+++ b/docs/api-doc-gen/config.json
@@ -0,0 +1,30 @@
+{
+  "hideMemberFields": [
+    "TypeMeta"
+  ],
+  "hideTypePatterns": [
+    "(LlamaStackDistribution)List$"
+  ],
+  "externalPackages": [
+    {
+      "typeMatchPrefix": "k8s.io/api/",
+      "docsURLTemplate": "https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#{{lower .TypeIdentifier}}-{{arrIndex .PackageSegments -1}}-{{arrIndex .PackageSegments -2}}"
+    },
+    {
+      "typeMatchPrefix": "k8s.io/apimachinery/pkg/apis/meta/v1",
+      "docsURLTemplate": "https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#{{lower .TypeIdentifier}}-v1-meta"
+    },
+    {
+      "typeMatchPrefix": "k8s.io/api/core/v1",
+      "docsURLTemplate": "https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#{{lower .TypeIdentifier}}-v1-core"
+    },
+    {
+      "typeMatchPrefix": "k8s.io/apimachinery/pkg/api/resource",
+      "docsURLTemplate": "https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#quantity-resource-core"
+    }
+  ],
+  "typeDisplayNamePrefixOverrides": {
+    "github.com/llamastack/llama-stack-k8s-operator/api/v1alpha1": ""
+  },
+  "markdownDisabled": false
+}
diff --git a/docs/content/contributing/development.md b/docs/content/contributing/development.md
new file mode 100644
index 000000000..aa2065d79
--- /dev/null
+++ b/docs/content/contributing/development.md
@@ -0,0 +1,56 @@
+# Development Guide
+
+Guide for contributing to the LlamaStack Kubernetes Operator.
+
+## Development Setup
+
+### Prerequisites
+
+- Go 1.24+
+- Docker
+- Kubernetes cluster (kind/minikube for local development)
+- kubectl
+- make
+
+### Local Development
+
+```bash
+# Clone the repository
+git clone https://github.com/llamastack/llama-stack-k8s-operator.git
+cd llama-stack-k8s-operator
+
+# Install dependencies
+make deps
+
+# Run tests
+make test
+
+# Build operator
+make build
+
+# Run locally
+make run
+```
+
+## Contributing
+
+### Code Style
+
+- Follow Go conventions
+- Use `gofmt` for formatting
+- Add tests for new features
+- Update documentation
+
+### Pull Request Process
+
+1. Fork the repository
+2. Create a feature branch
+3. Make your changes
+4. Add tests
+5. Update documentation
+6. Submit a pull request
+
+## Next Steps
+
+- [Testing Guide](testing.md)
+- [Documentation Guide](documentation.md)
diff --git a/docs/content/contributing/documentation.md b/docs/content/contributing/documentation.md
new file mode 100644
index 000000000..2492c1ddb
--- /dev/null
+++ b/docs/content/contributing/documentation.md
@@ -0,0 +1,75 @@
+# Documentation Guide
+
+Guide for contributing to the LlamaStack Kubernetes Operator documentation.
+
+## Documentation Structure
+
+The documentation is built with MkDocs and follows this structure:
+
+```
+docs/
+├── content/
+│   ├── index.md
+│   ├── getting-started/
+│   ├── how-to/
+│   ├── reference/
+│   ├── examples/
+│   └── contributing/
+└── mkdocs.yml
+```
+
+## Writing Documentation
+
+### Markdown Guidelines
+
+- Use clear, concise language
+- Include code examples
+- Add diagrams where helpful
+- Follow the existing style
+
+### Code Examples
+
+```yaml
+# Always include complete, working examples
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: example
+spec:
+  image: llamastack/llamastack:latest
+```
+
+## Building Documentation
+
+### Local Development
+
+```bash
+# Install dependencies
+pip install -r docs/requirements.txt
+
+# Serve locally
+make docs-serve
+
+# Build static site
+make docs-build
+```
+
+### API Documentation
+
+API documentation is auto-generated from Go types:
+
+```bash
+# Generate API docs
+make api-docs
+```
+
+## Contributing
+
+1. Edit markdown files in `docs/content/`
+2. Test locally with `make docs-serve`
+3. Submit a pull request
+
+## Next Steps
+
+- [Development Guide](development.md)
+- [Testing Guide](testing.md)
diff --git a/docs/content/contributing/testing.md b/docs/content/contributing/testing.md
new file mode 100644
index 000000000..8be17fcb2
--- /dev/null
+++ b/docs/content/contributing/testing.md
@@ -0,0 +1,51 @@
+# Testing Guide
+
+Testing guidelines for the LlamaStack Kubernetes Operator.
+
+## Test Types
+
+### Unit Tests
+
+```bash
+# Run unit tests
+make test
+
+# Run with coverage
+make test-coverage
+
+# Run specific package
+go test ./controllers/...
+```
+
+### Integration Tests
+
+```bash
+# Run integration tests
+make test-integration
+
+# Run e2e tests
+make test-e2e
+```
+
+## Writing Tests
+
+### Controller Tests
+
+```go
+func TestLlamaStackDistributionController(t *testing.T) {
+    // Test implementation
+}
+```
+
+### E2E Tests
+
+```go
+func TestE2EDeployment(t *testing.T) {
+    // E2E test implementation
+}
+```
+
+## Next Steps
+
+- [Development Guide](development.md)
+- [Documentation Guide](documentation.md)
diff --git a/docs/content/distributions/bedrock.md b/docs/content/distributions/bedrock.md
new file mode 100644
index 000000000..ee7d7d429
--- /dev/null
+++ b/docs/content/distributions/bedrock.md
@@ -0,0 +1,436 @@
+# AWS Bedrock Distribution
+
+!!! warning "Distribution Availability"
+    The Bedrock distribution container image may not be currently maintained or available.
+    Please verify the image exists at `docker.io/llamastack/distribution-bedrock:latest` before using this distribution.
+    For production use, consider using the `ollama` or `vllm` distributions which are actively maintained.
+
+The **Bedrock** distribution enables seamless integration with Amazon Bedrock, AWS's fully managed service for foundation models. This distribution allows you to leverage AWS Bedrock's powerful models through the LlamaStack Kubernetes Operator.
+
+## Overview
+
+Amazon Bedrock provides access to high-performing foundation models from leading AI companies through a single API. The Bedrock distribution:
+
+- **Connects to AWS Bedrock** for model inference
+- **Manages AWS credentials** securely
+- **Provides unified API** through LlamaStack
+- **Supports multiple Bedrock models** (Claude, Llama, Titan, etc.)
+
+## Distribution Details
+
+| Property | Value |
+|----------|-------|
+| **Distribution Name** | `bedrock` |
+| **Image** | `docker.io/llamastack/distribution-bedrock:latest` |
+| **Use Case** | AWS Bedrock model integration |
+| **Requirements** | AWS credentials and Bedrock access |
+| **Recommended For** | AWS users, enterprise deployments |
+
+## Prerequisites
+
+### 1. AWS Account Setup
+
+- AWS account with Bedrock access
+- IAM user/role with Bedrock permissions
+- Bedrock model access enabled in your AWS region
+
+### 2. Required AWS Permissions
+
+Your AWS credentials need the following permissions:
+
+```json
+{
+    "Version": "2012-10-17",
+    "Statement": [
+        {
+            "Effect": "Allow",
+            "Action": [
+                "bedrock:InvokeModel",
+                "bedrock:InvokeModelWithResponseStream",
+                "bedrock:ListFoundationModels",
+                "bedrock:GetFoundationModel"
+            ],
+            "Resource": "*"
+        }
+    ]
+}
+```
+
+### 3. Enable Bedrock Models
+
+Enable the models you want to use in the AWS Bedrock console:
+- Anthropic Claude models
+- Meta Llama models  
+- Amazon Titan models
+- Cohere Command models
+
+## Quick Start
+
+### 1. Create AWS Credentials Secret
+
+```bash
+kubectl create secret generic aws-credentials \
+  --from-literal=AWS_ACCESS_KEY_ID=your-access-key \
+  --from-literal=AWS_SECRET_ACCESS_KEY=your-secret-key \
+  --from-literal=AWS_DEFAULT_REGION=us-east-1
+```
+
+### 2. Create Bedrock Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-bedrock-llamastack
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "bedrock"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "1Gi"
+          cpu: "500m"
+        limits:
+          memory: "2Gi"
+          cpu: "1"
+      env:
+        - name: AWS_ACCESS_KEY_ID
+          valueFrom:
+            secretKeyRef:
+              name: aws-credentials
+              key: AWS_ACCESS_KEY_ID
+        - name: AWS_SECRET_ACCESS_KEY
+          valueFrom:
+            secretKeyRef:
+              name: aws-credentials
+              key: AWS_SECRET_ACCESS_KEY
+        - name: AWS_DEFAULT_REGION
+          valueFrom:
+            secretKeyRef:
+              name: aws-credentials
+              key: AWS_DEFAULT_REGION
+        - name: BEDROCK_MODEL_ID
+          value: "anthropic.claude-3-sonnet-20240229-v1:0"
+    storage:
+      size: "10Gi"
+```
+
+### 3. Deploy the Distribution
+
+```bash
+kubectl apply -f bedrock-distribution.yaml
+```
+
+### 4. Verify Deployment
+
+```bash
+# Check the distribution status
+kubectl get llamastackdistribution my-bedrock-llamastack
+
+# Check the pods
+kubectl get pods -l app=llama-stack
+
+# Check logs for AWS connection
+kubectl logs -l app=llama-stack
+```
+
+## Configuration Options
+
+### Supported Bedrock Models
+
+Configure different Bedrock models using the `BEDROCK_MODEL_ID` environment variable:
+
+#### Anthropic Claude Models
+```yaml
+env:
+  - name: BEDROCK_MODEL_ID
+    value: "anthropic.claude-3-sonnet-20240229-v1:0"  # Claude 3 Sonnet
+  # value: "anthropic.claude-3-haiku-20240307-v1:0"   # Claude 3 Haiku
+  # value: "anthropic.claude-v2:1"                     # Claude 2.1
+```
+
+#### Meta Llama Models
+```yaml
+env:
+  - name: BEDROCK_MODEL_ID
+    value: "meta.llama2-70b-chat-v1"  # Llama 2 70B Chat
+  # value: "meta.llama2-13b-chat-v1"  # Llama 2 13B Chat
+```
+
+#### Amazon Titan Models
+```yaml
+env:
+  - name: BEDROCK_MODEL_ID
+    value: "amazon.titan-text-express-v1"  # Titan Text Express
+  # value: "amazon.titan-text-lite-v1"     # Titan Text Lite
+```
+
+### AWS Authentication Methods
+
+#### Method 1: Access Keys (Secrets)
+```yaml
+env:
+  - name: AWS_ACCESS_KEY_ID
+    valueFrom:
+      secretKeyRef:
+        name: aws-credentials
+        key: AWS_ACCESS_KEY_ID
+  - name: AWS_SECRET_ACCESS_KEY
+    valueFrom:
+      secretKeyRef:
+        name: aws-credentials
+        key: AWS_SECRET_ACCESS_KEY
+  - name: AWS_DEFAULT_REGION
+    value: "us-east-1"
+```
+
+#### Method 2: IAM Roles for Service Accounts (IRSA)
+```yaml
+spec:
+  server:
+    podOverrides:
+      serviceAccountName: bedrock-service-account
+      annotations:
+        eks.amazonaws.com/role-arn: arn:aws:iam::123456789012:role/BedrockRole
+```
+
+#### Method 3: Instance Profile (EKS Nodes)
+```yaml
+# No additional configuration needed if EKS nodes have Bedrock permissions
+env:
+  - name: AWS_DEFAULT_REGION
+    value: "us-east-1"
+```
+
+### Environment Variables
+
+```yaml
+env:
+  - name: BEDROCK_MODEL_ID
+    value: "anthropic.claude-3-sonnet-20240229-v1:0"
+  - name: AWS_DEFAULT_REGION
+    value: "us-east-1"
+  - name: BEDROCK_MAX_TOKENS
+    value: "4096"
+  - name: BEDROCK_TEMPERATURE
+    value: "0.7"
+  - name: LOG_LEVEL
+    value: "INFO"
+```
+
+## Advanced Configuration
+
+### Multi-Model Setup
+
+Deploy multiple Bedrock distributions for different models:
+
+```yaml
+# Claude 3 Sonnet Distribution
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: bedrock-claude-sonnet
+spec:
+  server:
+    distribution:
+      name: "bedrock"
+    containerSpec:
+      env:
+        - name: BEDROCK_MODEL_ID
+          value: "anthropic.claude-3-sonnet-20240229-v1:0"
+---
+# Llama 2 70B Distribution  
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: bedrock-llama2-70b
+spec:
+  server:
+    distribution:
+      name: "bedrock"
+    containerSpec:
+      env:
+        - name: BEDROCK_MODEL_ID
+          value: "meta.llama2-70b-chat-v1"
+```
+
+### Production Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: production-bedrock
+  namespace: production
+spec:
+  replicas: 3
+  server:
+    distribution:
+      name: "bedrock"
+    containerSpec:
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "1"
+        limits:
+          memory: "4Gi"
+          cpu: "2"
+      env:
+        - name: AWS_ACCESS_KEY_ID
+          valueFrom:
+            secretKeyRef:
+              name: aws-credentials
+              key: AWS_ACCESS_KEY_ID
+        - name: AWS_SECRET_ACCESS_KEY
+          valueFrom:
+            secretKeyRef:
+              name: aws-credentials
+              key: AWS_SECRET_ACCESS_KEY
+        - name: AWS_DEFAULT_REGION
+          value: "us-east-1"
+        - name: BEDROCK_MODEL_ID
+          value: "anthropic.claude-3-sonnet-20240229-v1:0"
+        - name: LOG_LEVEL
+          value: "WARNING"
+        - name: ENABLE_TELEMETRY
+          value: "true"
+    storage:
+      size: "20Gi"
+```
+
+## Use Cases
+
+### 1. Enterprise AI Applications
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: enterprise-bedrock
+  namespace: enterprise
+spec:
+  replicas: 5
+  server:
+    distribution:
+      name: "bedrock"
+    containerSpec:
+      resources:
+        requests:
+          memory: "4Gi"
+          cpu: "2"
+        limits:
+          memory: "8Gi"
+          cpu: "4"
+      env:
+        - name: BEDROCK_MODEL_ID
+          value: "anthropic.claude-3-sonnet-20240229-v1:0"
+        - name: AWS_DEFAULT_REGION
+          value: "us-east-1"
+```
+
+### 2. Development and Testing
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: dev-bedrock
+  namespace: development
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "bedrock"
+    containerSpec:
+      resources:
+        requests:
+          memory: "512Mi"
+          cpu: "250m"
+        limits:
+          memory: "1Gi"
+          cpu: "500m"
+      env:
+        - name: BEDROCK_MODEL_ID
+          value: "anthropic.claude-3-haiku-20240307-v1:0"  # Faster, cheaper model
+        - name: LOG_LEVEL
+          value: "DEBUG"
+```
+
+## Monitoring and Troubleshooting
+
+### Health Checks
+
+```bash
+# Check distribution status
+kubectl get llamastackdistribution
+
+# Check pod logs for AWS connectivity
+kubectl logs -l app=llama-stack
+
+# Test AWS credentials
+kubectl exec -it <pod-name> -- aws bedrock list-foundation-models
+```
+
+### Common Issues
+
+1. **AWS Credentials Invalid**
+   ```bash
+   # Verify credentials in secret
+   kubectl get secret aws-credentials -o yaml
+   
+   # Test credentials
+   kubectl exec -it <pod-name> -- aws sts get-caller-identity
+   ```
+
+2. **Model Access Denied**
+   - Enable model access in AWS Bedrock console
+   - Verify IAM permissions include `bedrock:InvokeModel`
+   - Check if model is available in your AWS region
+
+3. **Region Issues**
+   - Ensure Bedrock is available in your region
+   - Verify `AWS_DEFAULT_REGION` matches model availability
+
+### Cost Monitoring
+
+Monitor AWS Bedrock costs:
+- Use AWS Cost Explorer to track Bedrock usage
+- Set up billing alerts for unexpected usage
+- Consider using cheaper models for development
+
+## Best Practices
+
+### Security
+- Use IAM roles instead of access keys when possible
+- Store credentials in Kubernetes Secrets
+- Implement least-privilege IAM policies
+- Enable AWS CloudTrail for audit logging
+
+### Performance
+- Choose appropriate models for your use case
+- Use Haiku for speed, Sonnet for balance, Opus for quality
+- Scale replicas based on request volume
+- Monitor response times and adjust accordingly
+
+### Cost Optimization
+- Use smaller models for development/testing
+- Implement request caching where appropriate
+- Monitor token usage and optimize prompts
+- Set up cost alerts and budgets
+
+## Next Steps
+
+- [Configure Storage](../how-to-guides/storage.md)
+- [Set up Monitoring](../how-to-guides/monitoring.md)
+- [Scaling Guide](../how-to-guides/scaling.md)
+- [Security Best Practices](../how-to-guides/security.md)
+
+## API Reference
+
+For complete API documentation, see:
+- [API Reference](../reference/api.md)
+- [Configuration Reference](../reference/configuration.md)
diff --git a/docs/content/distributions/bring-your-own.md b/docs/content/distributions/bring-your-own.md
new file mode 100644
index 000000000..4667c7a1b
--- /dev/null
+++ b/docs/content/distributions/bring-your-own.md
@@ -0,0 +1,562 @@
+# Bring Your Own (BYO) Distributions
+
+The LlamaStack Kubernetes operator supports both pre-built distributions and custom "Bring Your Own" (BYO) distributions. This guide shows you how to build, customize, and deploy your own LlamaStack distributions.
+
+## Overview
+
+### Supported vs BYO Distributions
+
+| Type | Description | Use Case | Configuration |
+|------|-------------|----------|---------------|
+| **Supported** | Pre-built distributions maintained by the LlamaStack team | Quick deployment, standard configurations | Use `distribution.name` field |
+| **BYO** | Custom distributions you build and maintain | Custom providers, specialized configurations | Use `distribution.image` field |
+
+### Why Build Custom Distributions?
+
+- **Custom Providers**: Integrate with proprietary or specialized inference engines
+- **Specific Configurations**: Tailor the stack for your exact requirements
+- **External Dependencies**: Include additional libraries or tools
+- **Security Requirements**: Control the entire build process and dependencies
+- **Performance Optimization**: Optimize for your specific hardware or use case
+
+## Building LlamaStack Distributions
+
+### Prerequisites
+
+1. **Install LlamaStack CLI**:
+   ```bash
+   pip install llama-stack
+   ```
+
+2. **Docker or Podman** (for container builds):
+   ```bash
+   # Verify Docker is running
+   docker --version
+   ```
+
+3. **Conda** (for conda builds):
+   ```bash
+   # Verify Conda is available
+   conda --version
+   ```
+
+### Quick Start: Building from Templates
+
+#### 1. List Available Templates
+
+```bash
+llama stack build --list-templates
+```
+
+This shows available templates like:
+- `ollama` - Ollama-based inference
+- `vllm-gpu` - vLLM with GPU support
+- `meta-reference-gpu` - Meta's reference implementation
+- `bedrock` - AWS Bedrock integration
+- `fireworks` - Fireworks AI integration
+
+#### 2. Build from Template
+
+```bash
+# Build a container image from Ollama template
+llama stack build --template ollama --image-type container
+
+# Build a conda environment from vLLM template
+llama stack build --template vllm-gpu --image-type conda
+
+# Build with custom name
+llama stack build --template ollama --image-type container --image-name my-custom-ollama
+```
+
+#### 3. Interactive Build
+
+```bash
+llama stack build
+```
+
+This launches an interactive wizard:
+
+```
+> Enter a name for your Llama Stack (e.g. my-local-stack): my-custom-stack
+> Enter the image type you want your Llama Stack to be built as (container or conda or venv): container
+
+Llama Stack is composed of several APIs working together. Let's select
+the provider types (implementations) you want to use for these APIs.
+
+> Enter provider for API inference: inline::meta-reference
+> Enter provider for API safety: inline::llama-guard
+> Enter provider for API agents: inline::meta-reference
+> Enter provider for API memory: inline::faiss
+> Enter provider for API datasetio: inline::meta-reference
+> Enter provider for API scoring: inline::meta-reference
+> Enter provider for API eval: inline::meta-reference
+> Enter provider for API telemetry: inline::meta-reference
+
+> (Optional) Enter a short description for your Llama Stack: My custom distribution
+```
+
+### Advanced: Custom Configuration Files
+
+#### 1. Create a Custom Build Configuration
+
+Create `my-custom-build.yaml`:
+
+```yaml
+name: my-custom-stack
+distribution_spec:
+  description: Custom distribution with external Ollama
+  providers:
+    inference: remote::ollama
+    memory: inline::faiss
+    safety: inline::llama-guard
+    agents: inline::meta-reference
+    telemetry: inline::meta-reference
+    datasetio: inline::meta-reference
+    scoring: inline::meta-reference
+    eval: inline::meta-reference
+image_name: my-custom-stack
+image_type: container
+
+# Optional: External providers directory
+external_providers_dir: ~/.llama/providers.d
+```
+
+#### 2. Build from Custom Configuration
+
+```bash
+llama stack build --config my-custom-build.yaml
+```
+
+### Image Types
+
+#### Container Images
+
+Best for production deployments and Kubernetes:
+
+```bash
+llama stack build --template ollama --image-type container
+```
+
+**Advantages**:
+- Consistent across environments
+- Easy to deploy in Kubernetes
+- Isolated dependencies
+- Reproducible builds
+
+#### Conda Environments
+
+Good for development and local testing:
+
+```bash
+llama stack build --template ollama --image-type conda
+```
+
+**Advantages**:
+- Fast iteration during development
+- Easy dependency management
+- Good for experimentation
+
+#### Virtual Environments
+
+Lightweight option for Python-only setups:
+
+```bash
+llama stack build --template ollama --image-type venv
+```
+
+## Custom Providers
+
+### Adding External Providers
+
+#### 1. Create Provider Configuration
+
+Create `~/.llama/providers.d/custom-ollama.yaml`:
+
+```yaml
+adapter:
+  adapter_type: custom_ollama
+  pip_packages:
+    - ollama
+    - aiohttp
+    - llama-stack-provider-ollama
+  config_class: llama_stack_ollama_provider.config.OllamaImplConfig
+  module: llama_stack_ollama_provider
+api_dependencies: []
+optional_api_dependencies: []
+```
+
+#### 2. Reference in Build Configuration
+
+```yaml
+name: custom-external-stack
+distribution_spec:
+  description: Custom distro with external providers
+  providers:
+    inference: remote::custom_ollama
+    memory: inline::faiss
+    safety: inline::llama-guard
+    agents: inline::meta-reference
+    telemetry: inline::meta-reference
+image_type: container
+image_name: custom-external-stack
+external_providers_dir: ~/.llama/providers.d
+```
+
+## Using Custom Distributions with Kubernetes
+
+### 1. Build and Push Container Image
+
+```bash
+# Build the distribution
+llama stack build --template ollama --image-type container --image-name my-ollama-dist
+
+# Tag for your registry
+docker tag distribution-my-ollama-dist:dev my-registry.com/my-ollama-dist:v1.0.0
+
+# Push to registry
+docker push my-registry.com/my-ollama-dist:v1.0.0
+```
+
+### 2. Deploy with Kubernetes Operator
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-custom-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      image: "my-registry.com/my-ollama-dist:v1.0.0"  # Custom image
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "8Gi"
+          cpu: "4"
+        limits:
+          memory: "16Gi"
+          cpu: "8"
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:1b"
+        - name: OLLAMA_URL
+          value: "http://ollama-server:11434"
+    storage:
+      size: "20Gi"
+```
+
+### 3. Verify Deployment
+
+```bash
+kubectl get llamastackdistribution my-custom-distribution
+kubectl get pods -l app=llama-stack
+kubectl logs -l app=llama-stack
+```
+
+## Examples
+
+### Example 1: Custom Ollama Distribution
+
+#### Build Configuration (`custom-ollama-build.yaml`)
+
+```yaml
+name: custom-ollama
+distribution_spec:
+  description: Custom Ollama distribution with additional tools
+  providers:
+    inference: remote::ollama
+    memory: inline::faiss
+    safety: inline::llama-guard
+    agents: inline::meta-reference
+    telemetry: inline::meta-reference
+image_name: custom-ollama
+image_type: container
+```
+
+#### Build and Deploy
+
+```bash
+# Build the distribution
+llama stack build --config custom-ollama-build.yaml
+
+# Tag and push
+docker tag distribution-custom-ollama:dev my-registry.com/custom-ollama:latest
+docker push my-registry.com/custom-ollama:latest
+```
+
+#### Kubernetes Deployment
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-ollama-dist
+spec:
+  replicas: 2
+  server:
+    distribution:
+      image: "my-registry.com/custom-ollama:latest"
+    containerSpec:
+      resources:
+        requests:
+          memory: "8Gi"
+          cpu: "4"
+        limits:
+          memory: "16Gi"
+          cpu: "8"
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:3b"
+        - name: OLLAMA_URL
+          value: "http://ollama-service:11434"
+```
+
+### Example 2: Custom vLLM Distribution
+
+#### Build Configuration (`custom-vllm-build.yaml`)
+
+```yaml
+name: custom-vllm
+distribution_spec:
+  description: Custom vLLM distribution with GPU optimization
+  providers:
+    inference: inline::vllm
+    memory: inline::faiss
+    safety: inline::llama-guard
+    agents: inline::meta-reference
+    telemetry: inline::meta-reference
+image_name: custom-vllm
+image_type: container
+```
+
+#### Enhanced Dockerfile
+
+Create a custom Dockerfile to extend the base distribution:
+
+```dockerfile
+FROM distribution-custom-vllm:dev
+
+# Install additional dependencies
+RUN pip install custom-optimization-library
+
+# Add custom configuration
+COPY custom-vllm-config.json /app/config.json
+
+# Set environment variables
+ENV VLLM_OPTIMIZATION_LEVEL=high
+ENV CUSTOM_GPU_SETTINGS=enabled
+
+# Expose port
+EXPOSE 8321
+```
+
+#### Build and Deploy
+
+```bash
+# Build the LlamaStack distribution
+llama stack build --config custom-vllm-build.yaml
+
+# Build enhanced Docker image
+docker build -t my-registry.com/enhanced-vllm:latest .
+
+# Push to registry
+docker push my-registry.com/enhanced-vllm:latest
+```
+
+#### Kubernetes Deployment
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: enhanced-vllm-dist
+spec:
+  replicas: 1
+  server:
+    distribution:
+      image: "my-registry.com/enhanced-vllm:latest"
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "2"
+          memory: "32Gi"
+          cpu: "8"
+        limits:
+          nvidia.com/gpu: "2"
+          memory: "64Gi"
+          cpu: "16"
+      env:
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-2-13b-chat-hf"
+        - name: VLLM_GPU_MEMORY_UTILIZATION
+          value: "0.9"
+        - name: VLLM_TENSOR_PARALLEL_SIZE
+          value: "2"
+```
+
+### Example 3: Multi-Provider Distribution
+
+#### Build Configuration (`multi-provider-build.yaml`)
+
+```yaml
+name: multi-provider
+distribution_spec:
+  description: Distribution with multiple inference providers
+  providers:
+    inference: 
+      - remote::ollama
+      - remote::vllm
+    memory: inline::faiss
+    safety: inline::llama-guard
+    agents: inline::meta-reference
+    telemetry: inline::meta-reference
+image_name: multi-provider
+image_type: container
+```
+
+## Testing Custom Distributions
+
+### Local Testing
+
+#### 1. Run Locally with Docker
+
+```bash
+# Set environment variables
+export LLAMA_STACK_PORT=8321
+export INFERENCE_MODEL="llama3.2:1b"
+
+# Run the custom distribution
+docker run -d \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  distribution-custom-ollama:dev \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env OLLAMA_URL=http://host.docker.internal:11434
+```
+
+#### 2. Test API Endpoints
+
+```bash
+# Health check
+curl http://localhost:8321/v1/health
+
+# List providers
+curl http://localhost:8321/v1/providers
+
+# Test inference
+curl -X POST http://localhost:8321/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "llama3.2:1b",
+    "prompt": "Hello, world!",
+    "max_tokens": 50
+  }'
+```
+
+### Kubernetes Testing
+
+#### 1. Deploy to Test Namespace
+
+```bash
+kubectl create namespace llama-test
+kubectl apply -f custom-distribution.yaml -n llama-test
+```
+
+#### 2. Port Forward for Testing
+
+```bash
+kubectl port-forward svc/my-custom-distribution-service 8321:8321 -n llama-test
+```
+
+#### 3. Run Tests
+
+```bash
+# Test from within cluster
+kubectl run test-pod --image=curlimages/curl --rm -it -- \
+  curl http://my-custom-distribution-service:8321/v1/health
+```
+
+## Best Practices
+
+### Security
+
+1. **Use Private Registries**: Store custom images in private container registries
+2. **Scan Images**: Use container scanning tools to check for vulnerabilities
+3. **Minimal Base Images**: Use slim or distroless base images when possible
+4. **Secrets Management**: Use Kubernetes secrets for API keys and credentials
+
+### Performance
+
+1. **Multi-stage Builds**: Use multi-stage Dockerfiles to reduce image size
+2. **Layer Caching**: Optimize Dockerfile layer ordering for better caching
+3. **Resource Limits**: Set appropriate CPU and memory limits
+4. **GPU Optimization**: Configure GPU settings for inference workloads
+
+### Maintenance
+
+1. **Version Tags**: Use semantic versioning for your custom images
+2. **Documentation**: Document your custom configurations and dependencies
+3. **Testing**: Implement automated testing for custom distributions
+4. **Monitoring**: Set up monitoring and logging for custom deployments
+
+### Development Workflow
+
+1. **Local Development**: Use conda/venv builds for rapid iteration
+2. **CI/CD Integration**: Automate building and testing of custom distributions
+3. **Staging Environment**: Test in staging before production deployment
+4. **Rollback Strategy**: Maintain previous versions for quick rollbacks
+
+## Troubleshooting
+
+### Common Issues
+
+#### Build Failures
+
+```bash
+# Check build logs
+llama stack build --template ollama --image-type container --verbose
+
+# Verify dependencies
+llama stack build --config my-build.yaml --print-deps-only
+```
+
+#### Runtime Issues
+
+```bash
+# Check container logs
+docker logs <container-id>
+
+# Debug with interactive shell
+docker run -it --entrypoint /bin/bash distribution-custom:dev
+```
+
+#### Kubernetes Issues
+
+```bash
+# Check pod status
+kubectl describe pod <pod-name>
+
+# View logs
+kubectl logs <pod-name> -f
+
+# Check events
+kubectl get events --sort-by=.metadata.creationTimestamp
+```
+
+### Getting Help
+
+1. **LlamaStack Documentation**: [Official docs](https://github.com/meta-llama/llama-stack)
+2. **GitHub Issues**: Report bugs and ask questions
+3. **Community Forums**: Join the LlamaStack community discussions
+4. **Operator Documentation**: Check the Kubernetes operator guides
+
+## Next Steps
+
+- [vLLM Distribution](vllm.md) - Learn about vLLM-specific configurations
+- [Ollama Distribution](ollama.md) - Explore Ollama distribution options
+- [Configuration Reference](../reference/configuration.md) - Complete API reference
+- [Scaling Guide](../how-to/scaling.md) - Scale your custom distributions
diff --git a/docs/content/distributions/ollama.md b/docs/content/distributions/ollama.md
new file mode 100644
index 000000000..5ffb01657
--- /dev/null
+++ b/docs/content/distributions/ollama.md
@@ -0,0 +1,733 @@
+# Ollama Distribution
+
+Ollama is a user-friendly platform for running large language models locally. The LlamaStack Kubernetes operator provides built-in support for Ollama through a pre-configured distribution.
+
+## Overview
+
+Ollama offers several advantages:
+
+- **Ease of Use**: Simple model management and deployment
+- **Local Execution**: Run models entirely on your infrastructure
+- **Model Library**: Access to a curated collection of popular models
+- **Resource Efficiency**: Optimized for various hardware configurations
+- **API Compatibility**: OpenAI-compatible API endpoints
+
+## Pre-Built Ollama Distribution
+
+The operator includes one pre-configured Ollama distribution:
+
+### ollama
+- **Image**: `docker.io/llamastack/distribution-ollama:latest`
+- **Purpose**: Standard Ollama deployment
+- **Requirements**: CPU or GPU resources depending on model
+- **Use Case**: General-purpose local LLM inference
+
+## Quick Start with Ollama
+
+### 1. Create a LlamaStackDistribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-ollama-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "ollama"  # Use supported distribution
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "8Gi"
+          cpu: "4"
+        limits:
+          memory: "16Gi"
+          cpu: "8"
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:1b"
+    storage:
+      size: "20Gi"
+      mountPath: "/.llama"
+```
+
+### 2. Deploy the Distribution
+
+```bash
+kubectl apply -f ollama-distribution.yaml
+```
+
+### 3. Verify Deployment
+
+```bash
+kubectl get llamastackdistribution my-ollama-distribution
+kubectl get pods -l app=llama-stack
+```
+
+## Configuration Options
+
+### Container Specification
+
+The `containerSpec` section allows you to configure the container:
+
+```yaml
+spec:
+  server:
+    containerSpec:
+      name: "llama-stack"  # Optional, defaults to "llama-stack"
+      port: 8321           # Optional, defaults to 8321
+      resources:
+        requests:
+          memory: "8Gi"
+          cpu: "4"
+        limits:
+          memory: "16Gi"
+          cpu: "8"
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:1b"
+        - name: OLLAMA_HOST
+          value: "0.0.0.0:11434"
+        - name: OLLAMA_ORIGINS
+          value: "*"
+```
+
+### Environment Variables
+
+Configure Ollama behavior through environment variables:
+
+```yaml
+env:
+  - name: INFERENCE_MODEL
+    value: "llama2:7b"
+  - name: OLLAMA_HOST
+    value: "0.0.0.0:11434"
+  - name: OLLAMA_ORIGINS
+    value: "*"
+  - name: OLLAMA_NUM_PARALLEL
+    value: "4"
+  - name: OLLAMA_MAX_LOADED_MODELS
+    value: "3"
+```
+
+### Popular Models
+
+You can specify different models using the `INFERENCE_MODEL` environment variable:
+
+```yaml
+# Llama 2 variants
+- name: INFERENCE_MODEL
+  value: "llama2:7b"      # 7B parameter model
+# value: "llama2:13b"     # 13B parameter model
+# value: "llama2:70b"     # 70B parameter model
+
+# Code-focused models
+# value: "codellama:7b"   # Code generation
+# value: "codellama:13b"  # Larger code model
+
+# Chat-optimized models
+# value: "llama2:7b-chat"
+# value: "llama2:13b-chat"
+
+# Other popular models
+# value: "mistral:7b"     # Mistral 7B
+# value: "neural-chat:7b" # Intel's neural chat
+# value: "orca-mini:3b"   # Smaller, efficient model
+```
+
+### Resource Requirements
+
+```yaml
+resources:
+  requests:
+    memory: "8Gi"
+    cpu: "4"
+  limits:
+    memory: "16Gi"
+    cpu: "8"
+```
+
+### GPU Support
+
+For GPU acceleration:
+
+```yaml
+resources:
+  requests:
+    nvidia.com/gpu: "1"
+    memory: "8Gi"
+    cpu: "2"
+  limits:
+    nvidia.com/gpu: "1"
+    memory: "16Gi"
+    cpu: "4"
+env:
+  - name: INFERENCE_MODEL
+    value: "llama2:7b"
+  - name: OLLAMA_GPU_LAYERS
+    value: "35"  # Number of layers to run on GPU
+```
+
+### Storage Configuration
+
+```yaml
+storage:
+  size: "20Gi"
+  mountPath: "/.llama"  # Optional, defaults to "/.llama"
+```
+
+## Advanced Configuration
+
+### Custom Model Management with Pod Overrides
+
+```yaml
+spec:
+  server:
+    podOverrides:
+      volumes:
+        - name: ollama-models
+          persistentVolumeClaim:
+            claimName: ollama-models-pvc
+      volumeMounts:
+        - name: ollama-models
+          mountPath: /root/.ollama
+    containerSpec:
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:1b"
+        - name: OLLAMA_MODELS
+          value: "/root/.ollama/models"
+```
+
+### Multiple Model Setup
+
+```yaml
+spec:
+  server:
+    containerSpec:
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:1b"  # Primary model
+        - name: OLLAMA_MAX_LOADED_MODELS
+          value: "3"
+        - name: ADDITIONAL_MODELS
+          value: "codellama:7b,mistral:7b"  # Additional models to pull
+      resources:
+        requests:
+          memory: "24Gi"
+          cpu: "8"
+        limits:
+          memory: "48Gi"
+          cpu: "16"
+```
+
+### Scaling with Multiple Replicas
+
+```yaml
+spec:
+  replicas: 2
+  server:
+    distribution:
+      name: "ollama"
+    containerSpec:
+      resources:
+        requests:
+          memory: "8Gi"
+          cpu: "4"
+        limits:
+          memory: "16Gi"
+          cpu: "8"
+```
+
+## Using Ollama with the Kubernetes Operator
+
+The LlamaStack Kubernetes operator supports Ollama in two ways:
+
+### 1. Pre-Built Distribution (Recommended)
+
+Use the pre-built, maintained distribution with the `distribution.name` field:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: ollama-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "ollama"  # Supported distribution
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "8Gi"
+          cpu: "4"
+        limits:
+          memory: "16Gi"
+          cpu: "8"
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:1b"
+        - name: OLLAMA_URL
+          value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
+    storage:
+      size: "20Gi"
+```
+
+#### With GPU Support
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: ollama-gpu-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "ollama"  # Supported distribution
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "1"
+          memory: "16Gi"
+          cpu: "8"
+        limits:
+          nvidia.com/gpu: "1"
+          memory: "32Gi"
+          cpu: "16"
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama2:7b"
+        - name: OLLAMA_GPU_LAYERS
+          value: "35"
+        - name: OLLAMA_NUM_PARALLEL
+          value: "4"
+    storage:
+      size: "50Gi"
+```
+
+### 2. Bring Your Own (BYO) Custom Images
+
+Use custom-built distributions with the `distribution.image` field:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-ollama-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      image: "my-registry.com/custom-ollama:v1.0.0"  # Custom image
+    containerSpec:
+      resources:
+        requests:
+          memory: "16Gi"
+          cpu: "8"
+        limits:
+          memory: "32Gi"
+          cpu: "16"
+      env:
+        - name: INFERENCE_MODEL
+          value: "custom-model:latest"
+        - name: CUSTOM_OLLAMA_SETTING
+          value: "optimized"
+    storage:
+      size: "100Gi"
+```
+
+## Building Custom Ollama Distributions
+
+### Step 1: Build with LlamaStack CLI
+
+#### Option A: From Template
+
+```bash
+# Install LlamaStack CLI
+pip install llama-stack
+
+# Build from Ollama template
+llama stack build --template ollama --image-type container --image-name my-ollama-dist
+```
+
+#### Option B: Custom Configuration
+
+Create `custom-ollama-build.yaml`:
+
+```yaml
+name: custom-ollama
+distribution_spec:
+  description: Custom Ollama distribution with pre-loaded models
+  providers:
+    inference: remote::ollama
+    memory: inline::faiss
+    safety: inline::llama-guard
+    agents: inline::meta-reference
+    telemetry: inline::meta-reference
+image_name: custom-ollama
+image_type: container
+```
+
+Build the distribution:
+
+```bash
+llama stack build --config custom-ollama-build.yaml
+```
+
+### Step 2: Enhance with Custom Dockerfile
+
+Create `Dockerfile.enhanced`:
+
+```dockerfile
+FROM distribution-custom-ollama:dev
+
+# Install additional tools
+RUN apt-get update && apt-get install -y \
+    curl \
+    jq \
+    htop \
+    && rm -rf /var/lib/apt/lists/*
+
+# Pre-pull popular models
+RUN ollama pull llama3.2:1b && \
+    ollama pull llama3.2:3b && \
+    ollama pull codellama:7b && \
+    ollama pull mistral:7b
+
+# Add custom model management scripts
+COPY scripts/model-manager.sh /usr/local/bin/model-manager
+COPY scripts/health-check.sh /usr/local/bin/health-check
+RUN chmod +x /usr/local/bin/model-manager /usr/local/bin/health-check
+
+# Add custom Ollama configuration
+COPY ollama-config.json /etc/ollama/config.json
+
+# Set optimized environment variables
+ENV OLLAMA_HOST=0.0.0.0:11434
+ENV OLLAMA_ORIGINS=*
+ENV OLLAMA_NUM_PARALLEL=4
+ENV OLLAMA_MAX_LOADED_MODELS=3
+ENV OLLAMA_KEEP_ALIVE=5m
+
+# Add health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+  CMD health-check
+
+EXPOSE 8321 11434
+```
+
+### Step 3: Deploy with Operator
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: enhanced-ollama-dist
+  namespace: production
+spec:
+  replicas: 2
+  server:
+    distribution:
+      image: "my-registry.com/enhanced-ollama:v1.0.0"
+    containerSpec:
+      resources:
+        requests:
+          memory: "16Gi"
+          cpu: "8"
+          nvidia.com/gpu: "1"
+        limits:
+          memory: "32Gi"
+          cpu: "16"
+          nvidia.com/gpu: "1"
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:3b"
+        - name: OLLAMA_NUM_PARALLEL
+          value: "4"
+        - name: OLLAMA_MAX_LOADED_MODELS
+          value: "2"
+        - name: CUSTOM_OPTIMIZATION
+          value: "enabled"
+    storage:
+      size: "200Gi"
+    podOverrides:
+      volumes:
+        - name: model-cache
+          persistentVolumeClaim:
+            claimName: shared-model-cache
+      volumeMounts:
+        - name: model-cache
+          mountPath: /shared-models
+```
+
+## Comparison: Pre-Built vs BYO
+
+| Aspect | Pre-Built Distribution | BYO Custom Images |
+|--------|----------------------|-------------------|
+| **Setup Complexity** | Simple - just specify `name` | Complex - build and maintain images |
+| **Maintenance** | Maintained by LlamaStack team | You maintain the images |
+| **Model Management** | Runtime model pulling | Pre-loaded models possible |
+| **Customization** | Limited to environment variables | Full control over Ollama configuration |
+| **Security** | Vetted by maintainers | You control security scanning and updates |
+| **Performance** | Standard Ollama setup | Custom optimizations possible |
+| **Support** | Community and official support | Self-supported |
+| **Updates** | Automatic with operator updates | Manual image rebuilds required |
+
+### When to Use Pre-Built Distribution
+
+- **Quick deployment** and standard use cases
+- **Production environments** where stability is key
+- **Dynamic model management** (pull models at runtime)
+- **Teams without container expertise**
+- **Standard Ollama configurations**
+
+### When to Use BYO Custom Images
+
+- **Pre-loaded models** for faster startup
+- **Custom Ollama configurations** or patches
+- **Additional tools** and utilities
+- **Compliance requirements** for image provenance
+- **Integration** with existing model management systems
+- **Custom model formats** or converters
+
+## Model Management
+
+### Accessing the Ollama Container
+
+```bash
+# Connect to running Ollama pod
+kubectl exec -it <ollama-pod> -- bash
+
+# Pull models
+ollama pull llama2:7b
+
+# List available models
+ollama list
+
+# Remove unused models
+ollama rm old-model:tag
+```
+
+### Model Information
+
+```bash
+# Show model details
+kubectl exec -it <ollama-pod> -- ollama show llama2:7b
+
+# Check model size and parameters
+kubectl exec -it <ollama-pod> -- ollama show llama2:7b --modelfile
+```
+
+## API Usage
+
+### REST API
+
+Ollama provides OpenAI-compatible endpoints:
+
+```bash
+# Generate completion
+curl -X POST http://ollama-service:8321/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "llama2:7b",
+    "prompt": "Why is the sky blue?",
+    "max_tokens": 100
+  }'
+
+# Chat completion
+curl -X POST http://ollama-service:8321/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "llama2:7b",
+    "messages": [
+      {"role": "user", "content": "Hello!"}
+    ]
+  }'
+```
+
+### Python Client
+
+```python
+import requests
+
+# Generate text
+response = requests.post(
+    "http://ollama-service:8321/v1/completions",
+    json={
+        "model": "llama2:7b",
+        "prompt": "Explain quantum computing",
+        "max_tokens": 200
+    }
+)
+
+print(response.json())
+```
+
+## Monitoring and Troubleshooting
+
+### Health Checks
+
+```bash
+# Check pod status
+kubectl get pods -l app=llama-stack
+
+# View logs
+kubectl logs -l app=llama-stack
+
+# Test API endpoint
+kubectl port-forward svc/my-ollama-distribution-service 8321:8321
+curl http://localhost:8321/v1/health
+```
+
+### Performance Monitoring
+
+```bash
+# Monitor resource usage
+kubectl top pods -l app=llama-stack
+
+# Check model loading status
+kubectl exec -it <ollama-pod> -- ollama ps
+```
+
+### Common Issues
+
+1. **Model Download Failures**
+   - Check internet connectivity
+   - Verify sufficient storage space
+   - Ensure proper permissions
+
+2. **Out of Memory**
+   - Use smaller models (3b, 7b instead of 13b, 70b)
+   - Increase memory limits
+   - Reduce concurrent requests
+
+3. **Slow Performance**
+   - Enable GPU acceleration
+   - Use faster storage for model cache
+   - Optimize model selection for hardware
+
+## Best Practices
+
+### Resource Planning
+
+- **Memory**: Allocate 2-4x model size in RAM
+- **Storage**: Plan for model downloads and cache
+- **CPU**: More cores improve concurrent request handling
+
+### Model Selection
+
+```yaml
+# For development/testing
+env:
+  - name: INFERENCE_MODEL
+    value: "orca-mini:3b"    # Fast, lightweight
+
+# For general use
+env:
+  - name: INFERENCE_MODEL
+    value: "llama2:7b"       # Good balance of quality/performance
+
+# For high-quality responses
+env:
+  - name: INFERENCE_MODEL
+    value: "llama2:13b"      # Better quality, more resources
+
+# For code generation
+env:
+  - name: INFERENCE_MODEL
+    value: "codellama:7b"    # Specialized for coding tasks
+```
+
+### Security Considerations
+
+- Use private registries for custom images
+- Implement network policies for API access
+- Secure model storage with appropriate permissions
+- Monitor API usage and implement rate limiting
+
+## Examples
+
+### Production Setup
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: production-ollama
+  namespace: llama-production
+spec:
+  replicas: 2
+  server:
+    distribution:
+      name: "ollama"
+    containerSpec:
+      resources:
+        requests:
+          memory: "16Gi"
+          cpu: "8"
+          nvidia.com/gpu: "1"
+        limits:
+          memory: "32Gi"
+          cpu: "16"
+          nvidia.com/gpu: "1"
+      env:
+        - name: INFERENCE_MODEL
+          value: "llama3.2:1b"
+        - name: OLLAMA_NUM_PARALLEL
+          value: "4"
+        - name: OLLAMA_MAX_LOADED_MODELS
+          value: "2"
+    storage:
+      size: "100Gi"
+```
+
+### Development Setup
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: dev-ollama
+  namespace: development
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "ollama"
+    containerSpec:
+      resources:
+        requests:
+          memory: "4Gi"
+          cpu: "2"
+        limits:
+          memory: "8Gi"
+          cpu: "4"
+      env:
+        - name: INFERENCE_MODEL
+          value: "orca-mini:3b"
+    storage:
+      size: "20Gi"
+```
+
+## API Reference
+
+For complete API documentation, see:
+- [API Reference](../reference/api.md)
+- [Configuration Reference](../reference/configuration.md)
+
+## Next Steps
+
+- [Configure Storage](../how-to/configure-storage.md)
+- [Scaling Guide](../how-to/scaling.md)
+- [Monitoring Setup](../how-to/monitoring.md)
+- [vLLM Distribution](vllm.md)
+- [Understanding Distributions](../getting-started/distributions.md)
diff --git a/docs/content/distributions/starter.md b/docs/content/distributions/starter.md
new file mode 100644
index 000000000..c5114da4d
--- /dev/null
+++ b/docs/content/distributions/starter.md
@@ -0,0 +1,363 @@
+# Starter Distribution
+
+The **Starter** distribution is the recommended default distribution for new users of the LlamaStack Kubernetes Operator. It provides a general-purpose LlamaStack deployment that's easy to set up and suitable for most use cases.
+
+## Overview
+
+The Starter distribution is designed to:
+
+- **Get you started quickly** with minimal configuration
+- **Provide a stable foundation** for LlamaStack applications
+- **Support common use cases** out of the box
+- **Serve as a learning platform** for understanding LlamaStack concepts
+
+## Distribution Details
+
+| Property | Value |
+|----------|-------|
+| **Distribution Name** | `starter` |
+| **Image** | `docker.io/llamastack/distribution-starter:latest` |
+| **Use Case** | General-purpose LlamaStack deployment |
+| **Requirements** | Basic Kubernetes resources |
+| **Recommended For** | New users, development, prototyping |
+
+## Quick Start
+
+### 1. Create a Basic Starter Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-starter-llamastack
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "starter"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "500m"
+        limits:
+          memory: "4Gi"
+          cpu: "1"
+    storage:
+      size: "20Gi"
+```
+
+### 2. Deploy the Distribution
+
+```bash
+kubectl apply -f starter-distribution.yaml
+```
+
+### 3. Verify Deployment
+
+```bash
+# Check the distribution status
+kubectl get llamastackdistribution my-starter-llamastack
+
+# Check the pods
+kubectl get pods -l app=llama-stack
+
+# Check the service
+kubectl get svc my-starter-llamastack-service
+```
+
+## Configuration Options
+
+### Basic Configuration
+
+```yaml
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "starter"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "500m"
+        limits:
+          memory: "4Gi"
+          cpu: "1"
+      env:
+        - name: LOG_LEVEL
+          value: "INFO"
+    storage:
+      size: "20Gi"
+      mountPath: "/.llama"
+```
+
+### Environment Variables
+
+Common environment variables for the Starter distribution:
+
+```yaml
+env:
+  - name: LOG_LEVEL
+    value: "INFO"  # DEBUG, INFO, WARNING, ERROR
+  - name: SERVER_PORT
+    value: "8321"
+  - name: ENABLE_TELEMETRY
+    value: "true"
+```
+
+### Resource Requirements
+
+#### Development Setup
+```yaml
+resources:
+  requests:
+    memory: "1Gi"
+    cpu: "250m"
+  limits:
+    memory: "2Gi"
+    cpu: "500m"
+```
+
+#### Production Setup
+```yaml
+resources:
+  requests:
+    memory: "4Gi"
+    cpu: "1"
+  limits:
+    memory: "8Gi"
+    cpu: "2"
+```
+
+## Advanced Configuration
+
+### Using ConfigMaps
+
+You can provide custom configuration using ConfigMaps:
+
+```yaml
+spec:
+  server:
+    distribution:
+      name: "starter"
+    userConfig:
+      configMapName: "my-llamastack-config"
+      configMapNamespace: "default"  # Optional, defaults to same namespace
+```
+
+Create the ConfigMap:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: my-llamastack-config
+  namespace: default
+data:
+  run.yaml: |
+    built_with: llama-stack-0.0.53
+    called_from: /tmp
+    distribution:
+      description: Built by `llama stack build` from `starter` template
+      name: starter
+      providers:
+        agents: meta-reference
+        inference: meta-reference
+        memory: meta-reference
+        safety: meta-reference
+        telemetry: meta-reference
+    image_name: starter
+```
+
+### Scaling
+
+Scale your Starter distribution horizontally:
+
+```yaml
+spec:
+  replicas: 3
+  server:
+    distribution:
+      name: "starter"
+    containerSpec:
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "500m"
+        limits:
+          memory: "4Gi"
+          cpu: "1"
+```
+
+### Custom Storage
+
+Configure persistent storage for your data:
+
+```yaml
+spec:
+  server:
+    storage:
+      size: "50Gi"
+      mountPath: "/.llama"
+```
+
+## Use Cases
+
+### 1. Learning and Development
+
+Perfect for developers new to LlamaStack:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: learning-llamastack
+  namespace: development
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "starter"
+    containerSpec:
+      resources:
+        requests:
+          memory: "1Gi"
+          cpu: "250m"
+        limits:
+          memory: "2Gi"
+          cpu: "500m"
+      env:
+        - name: LOG_LEVEL
+          value: "DEBUG"
+    storage:
+      size: "10Gi"
+```
+
+### 2. Prototyping Applications
+
+For building and testing LlamaStack applications:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: prototype-llamastack
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "starter"
+    containerSpec:
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "500m"
+        limits:
+          memory: "4Gi"
+          cpu: "1"
+    storage:
+      size: "20Gi"
+```
+
+### 3. Small Production Workloads
+
+For lightweight production deployments:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: production-starter
+  namespace: production
+spec:
+  replicas: 2
+  server:
+    distribution:
+      name: "starter"
+    containerSpec:
+      resources:
+        requests:
+          memory: "4Gi"
+          cpu: "1"
+        limits:
+          memory: "8Gi"
+          cpu: "2"
+      env:
+        - name: LOG_LEVEL
+          value: "WARNING"
+        - name: ENABLE_TELEMETRY
+          value: "true"
+    storage:
+      size: "100Gi"
+```
+
+## Monitoring and Troubleshooting
+
+### Health Checks
+
+Check the health of your Starter distribution:
+
+```bash
+# Check pod status
+kubectl get pods -l app=llama-stack
+
+# View logs
+kubectl logs -l app=llama-stack
+
+# Check service endpoints
+kubectl get svc -l app=llama-stack
+```
+
+### Common Issues
+
+1. **Pod Not Starting**
+   - Check resource availability in your cluster
+   - Verify image pull permissions
+   - Review pod events: `kubectl describe pod <pod-name>`
+
+2. **Service Not Accessible**
+   - Verify service creation: `kubectl get svc`
+   - Check port configuration
+   - Ensure network policies allow traffic
+
+3. **Storage Issues**
+   - Verify PVC creation: `kubectl get pvc`
+   - Check storage class availability
+   - Ensure sufficient cluster storage
+
+## Best Practices
+
+### Resource Planning
+- Start with minimal resources and scale up as needed
+- Monitor resource usage with `kubectl top pods`
+- Use resource requests to ensure scheduling
+
+### Configuration Management
+- Use ConfigMaps for complex configurations
+- Store sensitive data in Secrets
+- Version your configuration files
+
+### Monitoring
+- Enable telemetry for production deployments
+- Set up log aggregation
+- Monitor pod health and resource usage
+
+## Next Steps
+
+Once you're comfortable with the Starter distribution, consider:
+
+1. **[Ollama Distribution](ollama.md)** - For local inference with Ollama
+2. **[vLLM Distribution](vllm.md)** - For high-performance GPU inference
+3. **[Bedrock Distribution](bedrock.md)** - For AWS Bedrock integration
+4. **[Custom Images](bring-your-own.md)** - For specialized requirements
+
+## API Reference
+
+For complete API documentation, see:
+- [API Reference](../reference/api.md)
+- [Configuration Reference](../reference/configuration.md)
diff --git a/docs/content/distributions/tgi.md b/docs/content/distributions/tgi.md
new file mode 100644
index 000000000..c01fb1e69
--- /dev/null
+++ b/docs/content/distributions/tgi.md
@@ -0,0 +1,519 @@
+# Text Generation Inference (TGI) Distribution
+
+!!! warning "Distribution Availability"
+    The TGI distribution container image may not be currently maintained or available.
+    Please verify the image exists at `docker.io/llamastack/distribution-tgi:latest` before using this distribution.
+    For production use, consider using the `ollama` or `vllm` distributions which are actively maintained.
+
+The **TGI** distribution integrates with Hugging Face's Text Generation Inference (TGI) server, providing high-performance inference for large language models with optimized serving capabilities.
+
+## Overview
+
+Text Generation Inference (TGI) is Hugging Face's solution for deploying and serving Large Language Models. The TGI distribution:
+
+- **Connects to TGI servers** for optimized model inference
+- **Supports streaming responses** for real-time applications
+- **Provides high throughput** with batching and optimization
+- **Compatible with Hugging Face models** from the Hub
+
+## Distribution Details
+
+| Property | Value |
+|----------|-------|
+| **Distribution Name** | `tgi` |
+| **Image** | `docker.io/llamastack/distribution-tgi:latest` |
+| **Use Case** | Hugging Face TGI server integration |
+| **Requirements** | TGI server endpoint |
+| **Recommended For** | High-performance inference, Hugging Face ecosystem |
+
+## Prerequisites
+
+### 1. TGI Server Setup
+
+You need a running TGI server. You can deploy one using:
+
+#### Option A: Deploy TGI in Kubernetes
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: tgi-server
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: tgi-server
+  template:
+    metadata:
+      labels:
+        app: tgi-server
+    spec:
+      containers:
+      - name: tgi
+        image: ghcr.io/huggingface/text-generation-inference:latest
+        ports:
+        - containerPort: 80
+        env:
+        - name: MODEL_ID
+          value: "microsoft/DialoGPT-medium"
+        - name: PORT
+          value: "80"
+        resources:
+          requests:
+            memory: "4Gi"
+            cpu: "2"
+          limits:
+            memory: "8Gi"
+            cpu: "4"
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: tgi-server-service
+spec:
+  selector:
+    app: tgi-server
+  ports:
+  - port: 80
+    targetPort: 80
+```
+
+#### Option B: External TGI Server
+Use an existing TGI deployment (cloud or on-premises).
+
+## Quick Start
+
+### 1. Create TGI Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-tgi-llamastack
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "tgi"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "1Gi"
+          cpu: "500m"
+        limits:
+          memory: "2Gi"
+          cpu: "1"
+      env:
+        - name: TGI_URL
+          value: "http://tgi-server-service:80"
+        - name: MODEL_ID
+          value: "microsoft/DialoGPT-medium"
+    storage:
+      size: "10Gi"
+```
+
+### 2. Deploy the Distribution
+
+```bash
+kubectl apply -f tgi-distribution.yaml
+```
+
+### 3. Verify Deployment
+
+```bash
+# Check the distribution status
+kubectl get llamastackdistribution my-tgi-llamastack
+
+# Check the pods
+kubectl get pods -l app=llama-stack
+
+# Test TGI connectivity
+kubectl logs -l app=llama-stack
+```
+
+## Configuration Options
+
+### Environment Variables
+
+Configure TGI connection and behavior:
+
+```yaml
+env:
+  - name: TGI_URL
+    value: "http://tgi-server-service:80"
+  - name: MODEL_ID
+    value: "microsoft/DialoGPT-medium"
+  - name: TGI_TIMEOUT
+    value: "30"  # Request timeout in seconds
+  - name: TGI_MAX_TOKENS
+    value: "512"
+  - name: TGI_TEMPERATURE
+    value: "0.7"
+  - name: TGI_TOP_P
+    value: "0.9"
+  - name: LOG_LEVEL
+    value: "INFO"
+```
+
+### TGI Server Configuration
+
+Common TGI server models and configurations:
+
+#### Small Models (Development)
+```yaml
+env:
+  - name: TGI_URL
+    value: "http://tgi-server:80"
+  - name: MODEL_ID
+    value: "microsoft/DialoGPT-small"  # ~117M parameters
+  - name: TGI_MAX_TOKENS
+    value: "256"
+```
+
+#### Medium Models (Production)
+```yaml
+env:
+  - name: TGI_URL
+    value: "http://tgi-server:80"
+  - name: MODEL_ID
+    value: "microsoft/DialoGPT-medium"  # ~345M parameters
+  - name: TGI_MAX_TOKENS
+    value: "512"
+```
+
+#### Large Models (High Performance)
+```yaml
+env:
+  - name: TGI_URL
+    value: "http://tgi-server:80"
+  - name: MODEL_ID
+    value: "microsoft/DialoGPT-large"  # ~762M parameters
+  - name: TGI_MAX_TOKENS
+    value: "1024"
+```
+
+### Resource Requirements
+
+#### Lightweight Setup
+```yaml
+resources:
+  requests:
+    memory: "512Mi"
+    cpu: "250m"
+  limits:
+    memory: "1Gi"
+    cpu: "500m"
+```
+
+#### Standard Setup
+```yaml
+resources:
+  requests:
+    memory: "1Gi"
+    cpu: "500m"
+  limits:
+    memory: "2Gi"
+    cpu: "1"
+```
+
+#### High-Performance Setup
+```yaml
+resources:
+  requests:
+    memory: "2Gi"
+    cpu: "1"
+  limits:
+    memory: "4Gi"
+    cpu: "2"
+```
+
+## Advanced Configuration
+
+### Multiple TGI Servers
+
+Connect to multiple TGI servers for load balancing:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: multi-tgi-llamastack
+spec:
+  replicas: 2
+  server:
+    distribution:
+      name: "tgi"
+    containerSpec:
+      env:
+        - name: TGI_URLS
+          value: "http://tgi-server-1:80,http://tgi-server-2:80"
+        - name: TGI_LOAD_BALANCE
+          value: "round_robin"  # round_robin, random, least_connections
+```
+
+### TGI with GPU Support
+
+For GPU-accelerated TGI servers:
+
+```yaml
+# TGI Server with GPU
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: tgi-gpu-server
+spec:
+  template:
+    spec:
+      containers:
+      - name: tgi
+        image: ghcr.io/huggingface/text-generation-inference:latest
+        env:
+        - name: MODEL_ID
+          value: "meta-llama/Llama-2-7b-chat-hf"
+        - name: CUDA_VISIBLE_DEVICES
+          value: "0"
+        resources:
+          requests:
+            nvidia.com/gpu: "1"
+            memory: "16Gi"
+          limits:
+            nvidia.com/gpu: "1"
+            memory: "32Gi"
+```
+
+### Custom TGI Configuration
+
+Use ConfigMaps for complex TGI configurations:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: tgi-config
+data:
+  tgi-settings.json: |
+    {
+      "max_concurrent_requests": 128,
+      "max_best_of": 2,
+      "max_stop_sequences": 4,
+      "max_input_length": 1024,
+      "max_total_tokens": 2048,
+      "waiting_served_ratio": 1.2,
+      "max_batch_prefill_tokens": 4096,
+      "max_batch_total_tokens": 8192
+    }
+---
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-tgi-llamastack
+spec:
+  server:
+    distribution:
+      name: "tgi"
+    containerSpec:
+      env:
+        - name: TGI_CONFIG_FILE
+          value: "/config/tgi-settings.json"
+    podOverrides:
+      volumes:
+        - name: tgi-config
+          configMap:
+            name: tgi-config
+      volumeMounts:
+        - name: tgi-config
+          mountPath: /config
+```
+
+## Use Cases
+
+### 1. Development Environment
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: dev-tgi
+  namespace: development
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "tgi"
+    containerSpec:
+      resources:
+        requests:
+          memory: "512Mi"
+          cpu: "250m"
+        limits:
+          memory: "1Gi"
+          cpu: "500m"
+      env:
+        - name: TGI_URL
+          value: "http://tgi-dev-server:80"
+        - name: MODEL_ID
+          value: "microsoft/DialoGPT-small"
+        - name: LOG_LEVEL
+          value: "DEBUG"
+    storage:
+      size: "5Gi"
+```
+
+### 2. Production Deployment
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: production-tgi
+  namespace: production
+spec:
+  replicas: 3
+  server:
+    distribution:
+      name: "tgi"
+    containerSpec:
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "1"
+        limits:
+          memory: "4Gi"
+          cpu: "2"
+      env:
+        - name: TGI_URL
+          value: "http://tgi-prod-server:80"
+        - name: MODEL_ID
+          value: "meta-llama/Llama-2-7b-chat-hf"
+        - name: TGI_MAX_TOKENS
+          value: "1024"
+        - name: TGI_TEMPERATURE
+          value: "0.7"
+        - name: ENABLE_TELEMETRY
+          value: "true"
+    storage:
+      size: "50Gi"
+```
+
+### 3. High-Throughput Setup
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: high-throughput-tgi
+  namespace: production
+spec:
+  replicas: 5
+  server:
+    distribution:
+      name: "tgi"
+    containerSpec:
+      resources:
+        requests:
+          memory: "4Gi"
+          cpu: "2"
+        limits:
+          memory: "8Gi"
+          cpu: "4"
+      env:
+        - name: TGI_URLS
+          value: "http://tgi-1:80,http://tgi-2:80,http://tgi-3:80"
+        - name: TGI_LOAD_BALANCE
+          value: "least_connections"
+        - name: TGI_TIMEOUT
+          value: "60"
+        - name: TGI_MAX_CONCURRENT_REQUESTS
+          value: "256"
+```
+
+## Monitoring and Troubleshooting
+
+### Health Checks
+
+```bash
+# Check distribution status
+kubectl get llamastackdistribution
+
+# Check TGI connectivity
+kubectl logs -l app=llama-stack | grep -i tgi
+
+# Test TGI server directly
+kubectl exec -it <pod-name> -- curl http://tgi-server:80/health
+```
+
+### Performance Monitoring
+
+```bash
+# Monitor resource usage
+kubectl top pods -l app=llama-stack
+
+# Check TGI server metrics
+kubectl exec -it <tgi-pod> -- curl http://localhost:80/metrics
+
+# Monitor request latency
+kubectl logs -l app=llama-stack | grep -i "response_time"
+```
+
+### Common Issues
+
+1. **TGI Server Unreachable**
+   ```bash
+   # Check TGI server status
+   kubectl get pods -l app=tgi-server
+   kubectl logs -l app=tgi-server
+   
+   # Test connectivity
+   kubectl exec -it <llamastack-pod> -- curl http://tgi-server:80/health
+   ```
+
+2. **Model Loading Failures**
+   - Verify model ID exists on Hugging Face Hub
+   - Check TGI server has sufficient resources
+   - Ensure model is compatible with TGI
+
+3. **Timeout Issues**
+   - Increase `TGI_TIMEOUT` value
+   - Check TGI server performance
+   - Monitor network latency
+
+## Best Practices
+
+### Performance Optimization
+- Use appropriate batch sizes for your workload
+- Configure TGI server with optimal parameters
+- Monitor and tune timeout values
+- Use multiple TGI servers for high availability
+
+### Resource Management
+- Size TGI servers based on model requirements
+- Monitor GPU utilization if using GPU acceleration
+- Scale LlamaStack replicas based on request volume
+- Use resource requests and limits
+
+### Security
+- Secure TGI server endpoints with authentication
+- Use network policies to restrict access
+- Monitor API usage and implement rate limiting
+- Keep TGI server images updated
+
+### Model Management
+- Version your models and TGI configurations
+- Test model changes in development first
+- Monitor model performance and accuracy
+- Have rollback procedures for model updates
+
+## Next Steps
+
+- [Configure Scaling](../how-to-guides/scaling.md)
+- [Set up Monitoring](../how-to-guides/monitoring.md)
+- [Security Configuration](../how-to-guides/security.md)
+- [Performance Tuning](../how-to-guides/performance.md)
+
+## API Reference
+
+For complete API documentation, see:
+- [API Reference](../reference/api.md)
+- [Configuration Reference](../reference/configuration.md)
diff --git a/docs/content/distributions/together.md b/docs/content/distributions/together.md
new file mode 100644
index 000000000..31cf59337
--- /dev/null
+++ b/docs/content/distributions/together.md
@@ -0,0 +1,553 @@
+# Together AI Distribution
+
+!!! warning "Distribution Availability"
+    The Together distribution container image may not be currently maintained or available.
+    Please verify the image exists at `docker.io/llamastack/distribution-together:latest` before using this distribution.
+    For production use, consider using the `ollama` or `vllm` distributions which are actively maintained.
+
+The **Together** distribution integrates with Together AI's inference platform, providing access to a wide variety of open-source models through their optimized API service.
+
+## Overview
+
+Together AI offers fast, scalable inference for open-source language models. The Together distribution:
+
+- **Connects to Together AI API** for model inference
+- **Supports multiple open-source models** (Llama, Mistral, CodeLlama, etc.)
+- **Provides high-performance inference** with optimized serving
+- **Offers cost-effective scaling** with pay-per-use pricing
+
+## Distribution Details
+
+| Property | Value |
+|----------|-------|
+| **Distribution Name** | `together` |
+| **Image** | `docker.io/llamastack/distribution-together:latest` |
+| **Use Case** | Together AI API integration |
+| **Requirements** | Together AI API key |
+| **Recommended For** | Open-source models, cost-effective inference |
+
+## Prerequisites
+
+### 1. Together AI Account
+
+- Sign up at [together.ai](https://together.ai)
+- Get your API key from the dashboard
+- Choose your preferred models
+
+### 2. API Key Setup
+
+Create a Kubernetes secret with your Together AI API key:
+
+```bash
+kubectl create secret generic together-api-key \
+  --from-literal=TOGETHER_API_KEY=your-api-key-here
+```
+
+## Quick Start
+
+### 1. Create Together Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-together-llamastack
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "1Gi"
+          cpu: "500m"
+        limits:
+          memory: "2Gi"
+          cpu: "1"
+      env:
+        - name: TOGETHER_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: together-api-key
+              key: TOGETHER_API_KEY
+        - name: TOGETHER_MODEL
+          value: "meta-llama/Llama-2-7b-chat-hf"
+    storage:
+      size: "10Gi"
+```
+
+### 2. Deploy the Distribution
+
+```bash
+kubectl apply -f together-distribution.yaml
+```
+
+### 3. Verify Deployment
+
+```bash
+# Check the distribution status
+kubectl get llamastackdistribution my-together-llamastack
+
+# Check the pods
+kubectl get pods -l app=llama-stack
+
+# Check logs for Together AI connectivity
+kubectl logs -l app=llama-stack
+```
+
+## Configuration Options
+
+### Supported Models
+
+Together AI supports many popular open-source models:
+
+#### Meta Llama Models
+```yaml
+env:
+  - name: TOGETHER_MODEL
+    value: "meta-llama/Llama-2-7b-chat-hf"
+  # value: "meta-llama/Llama-2-13b-chat-hf"
+  # value: "meta-llama/Llama-2-70b-chat-hf"
+  # value: "meta-llama/CodeLlama-7b-Instruct-hf"
+  # value: "meta-llama/CodeLlama-13b-Instruct-hf"
+```
+
+#### Mistral Models
+```yaml
+env:
+  - name: TOGETHER_MODEL
+    value: "mistralai/Mistral-7B-Instruct-v0.1"
+  # value: "mistralai/Mixtral-8x7B-Instruct-v0.1"
+```
+
+#### Other Popular Models
+```yaml
+env:
+  - name: TOGETHER_MODEL
+    value: "togethercomputer/RedPajama-INCITE-7B-Chat"
+  # value: "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
+  # value: "teknium/OpenHermes-2.5-Mistral-7B"
+```
+
+### Environment Variables
+
+Configure Together AI connection and model parameters:
+
+```yaml
+env:
+  - name: TOGETHER_API_KEY
+    valueFrom:
+      secretKeyRef:
+        name: together-api-key
+        key: TOGETHER_API_KEY
+  - name: TOGETHER_MODEL
+    value: "meta-llama/Llama-2-7b-chat-hf"
+  - name: TOGETHER_MAX_TOKENS
+    value: "512"
+  - name: TOGETHER_TEMPERATURE
+    value: "0.7"
+  - name: TOGETHER_TOP_P
+    value: "0.9"
+  - name: TOGETHER_TOP_K
+    value: "50"
+  - name: TOGETHER_REPETITION_PENALTY
+    value: "1.0"
+  - name: TOGETHER_TIMEOUT
+    value: "30"  # Request timeout in seconds
+  - name: LOG_LEVEL
+    value: "INFO"
+```
+
+### Resource Requirements
+
+#### Development Setup
+```yaml
+resources:
+  requests:
+    memory: "512Mi"
+    cpu: "250m"
+  limits:
+    memory: "1Gi"
+    cpu: "500m"
+```
+
+#### Production Setup
+```yaml
+resources:
+  requests:
+    memory: "1Gi"
+    cpu: "500m"
+  limits:
+    memory: "2Gi"
+    cpu: "1"
+```
+
+#### High-Throughput Setup
+```yaml
+resources:
+  requests:
+    memory: "2Gi"
+    cpu: "1"
+  limits:
+    memory: "4Gi"
+    cpu: "2"
+```
+
+## Advanced Configuration
+
+### Multiple Models
+
+Deploy different distributions for different models:
+
+```yaml
+# Llama 2 7B for general chat
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: together-llama2-7b
+spec:
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      env:
+        - name: TOGETHER_MODEL
+          value: "meta-llama/Llama-2-7b-chat-hf"
+---
+# CodeLlama for code generation
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: together-codellama
+spec:
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      env:
+        - name: TOGETHER_MODEL
+          value: "meta-llama/CodeLlama-7b-Instruct-hf"
+```
+
+### Production Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: production-together
+  namespace: production
+spec:
+  replicas: 3
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "1"
+        limits:
+          memory: "4Gi"
+          cpu: "2"
+      env:
+        - name: TOGETHER_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: together-api-key
+              key: TOGETHER_API_KEY
+        - name: TOGETHER_MODEL
+          value: "meta-llama/Llama-2-13b-chat-hf"
+        - name: TOGETHER_MAX_TOKENS
+          value: "1024"
+        - name: TOGETHER_TEMPERATURE
+          value: "0.7"
+        - name: TOGETHER_TIMEOUT
+          value: "60"
+        - name: LOG_LEVEL
+          value: "WARNING"
+        - name: ENABLE_TELEMETRY
+          value: "true"
+    storage:
+      size: "20Gi"
+```
+
+### Custom Configuration with ConfigMap
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: together-config
+data:
+  together-settings.json: |
+    {
+      "default_model": "meta-llama/Llama-2-7b-chat-hf",
+      "max_tokens": 512,
+      "temperature": 0.7,
+      "top_p": 0.9,
+      "top_k": 50,
+      "repetition_penalty": 1.0,
+      "stop_sequences": ["</s>", "[INST]", "[/INST]"],
+      "retry_config": {
+        "max_retries": 3,
+        "backoff_factor": 2,
+        "max_backoff": 60
+      }
+    }
+---
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-together
+spec:
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      env:
+        - name: TOGETHER_CONFIG_FILE
+          value: "/config/together-settings.json"
+    podOverrides:
+      volumes:
+        - name: together-config
+          configMap:
+            name: together-config
+      volumeMounts:
+        - name: together-config
+          mountPath: /config
+```
+
+## Use Cases
+
+### 1. Development and Prototyping
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: dev-together
+  namespace: development
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      resources:
+        requests:
+          memory: "512Mi"
+          cpu: "250m"
+        limits:
+          memory: "1Gi"
+          cpu: "500m"
+      env:
+        - name: TOGETHER_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: together-api-key
+              key: TOGETHER_API_KEY
+        - name: TOGETHER_MODEL
+          value: "meta-llama/Llama-2-7b-chat-hf"
+        - name: TOGETHER_MAX_TOKENS
+          value: "256"
+        - name: LOG_LEVEL
+          value: "DEBUG"
+    storage:
+      size: "5Gi"
+```
+
+### 2. Code Generation Service
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: code-generation-together
+  namespace: default
+spec:
+  replicas: 2
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      resources:
+        requests:
+          memory: "1Gi"
+          cpu: "500m"
+        limits:
+          memory: "2Gi"
+          cpu: "1"
+      env:
+        - name: TOGETHER_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: together-api-key
+              key: TOGETHER_API_KEY
+        - name: TOGETHER_MODEL
+          value: "meta-llama/CodeLlama-13b-Instruct-hf"
+        - name: TOGETHER_MAX_TOKENS
+          value: "2048"
+        - name: TOGETHER_TEMPERATURE
+          value: "0.1"  # Lower temperature for code
+    storage:
+      size: "15Gi"
+```
+
+### 3. High-Volume Production
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: high-volume-together
+  namespace: production
+spec:
+  replicas: 5
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "1"
+        limits:
+          memory: "4Gi"
+          cpu: "2"
+      env:
+        - name: TOGETHER_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: together-api-key
+              key: TOGETHER_API_KEY
+        - name: TOGETHER_MODEL
+          value: "meta-llama/Llama-2-70b-chat-hf"
+        - name: TOGETHER_MAX_TOKENS
+          value: "1024"
+        - name: TOGETHER_TIMEOUT
+          value: "120"
+        - name: ENABLE_TELEMETRY
+          value: "true"
+    storage:
+      size: "50Gi"
+```
+
+## Monitoring and Troubleshooting
+
+### Health Checks
+
+```bash
+# Check distribution status
+kubectl get llamastackdistribution
+
+# Check API connectivity
+kubectl logs -l app=llama-stack | grep -i together
+
+# Test API key
+kubectl exec -it <pod-name> -- curl -H "Authorization: Bearer $TOGETHER_API_KEY" \
+  https://api.together.xyz/v1/models
+```
+
+### Performance Monitoring
+
+```bash
+# Monitor resource usage
+kubectl top pods -l app=llama-stack
+
+# Check API response times
+kubectl logs -l app=llama-stack | grep -i "response_time"
+
+# Monitor API usage
+kubectl logs -l app=llama-stack | grep -i "api_usage"
+```
+
+### Common Issues
+
+1. **Invalid API Key**
+   ```bash
+   # Verify API key in secret
+   kubectl get secret together-api-key -o yaml
+   
+   # Test API key manually
+   kubectl exec -it <pod-name> -- env | grep TOGETHER_API_KEY
+   ```
+
+2. **Model Not Available**
+   - Check if model exists in Together AI catalog
+   - Verify model name spelling and format
+   - Some models may have usage restrictions
+
+3. **Rate Limiting**
+   - Monitor API usage and limits
+   - Implement request queuing
+   - Consider upgrading Together AI plan
+
+4. **Timeout Issues**
+   - Increase `TOGETHER_TIMEOUT` value
+   - Check network connectivity
+   - Monitor Together AI service status
+
+## Best Practices
+
+### Cost Optimization
+- Choose appropriate models for your use case
+- Monitor token usage and optimize prompts
+- Use smaller models for development/testing
+- Implement caching for repeated requests
+- Set up usage alerts and budgets
+
+### Performance
+- Scale replicas based on request volume
+- Use connection pooling and keep-alive
+- Implement request batching where possible
+- Monitor and optimize timeout values
+
+### Security
+- Store API keys in Kubernetes Secrets
+- Use least-privilege access controls
+- Monitor API usage for anomalies
+- Rotate API keys regularly
+- Implement rate limiting and request validation
+
+### Reliability
+- Implement retry logic with exponential backoff
+- Use multiple replicas for high availability
+- Monitor Together AI service status
+- Have fallback mechanisms for service outages
+
+## Cost Management
+
+### Usage Monitoring
+```yaml
+env:
+  - name: ENABLE_USAGE_TRACKING
+    value: "true"
+  - name: USAGE_LOG_LEVEL
+    value: "INFO"
+  - name: COST_ALERT_THRESHOLD
+    value: "100"  # Alert when daily cost exceeds $100
+```
+
+### Budget Controls
+- Set up billing alerts in Together AI dashboard
+- Implement request quotas per user/application
+- Monitor token usage patterns
+- Use smaller models for non-critical workloads
+
+## Next Steps
+
+- [Configure Monitoring](../how-to-guides/monitoring.md)
+- [Set up Scaling](../how-to-guides/scaling.md)
+- [Security Best Practices](../how-to-guides/security.md)
+- [Cost Optimization](../how-to-guides/cost-optimization.md)
+
+## API Reference
+
+For complete API documentation, see:
+- [API Reference](../reference/api.md)
+- [Configuration Reference](../reference/configuration.md)
+- [Together AI API Documentation](https://docs.together.ai/)
diff --git a/docs/content/distributions/vllm.md b/docs/content/distributions/vllm.md
new file mode 100644
index 000000000..a1afd214d
--- /dev/null
+++ b/docs/content/distributions/vllm.md
@@ -0,0 +1,613 @@
+# vLLM Distribution
+
+vLLM is a high-performance inference engine optimized for large language models. The LlamaStack Kubernetes operator provides built-in support for vLLM through pre-configured distributions.
+
+## Overview
+
+vLLM offers excellent performance characteristics:
+
+- **High Throughput**: Optimized for serving multiple concurrent requests
+- **Memory Efficiency**: Advanced memory management and attention mechanisms
+- **GPU Acceleration**: Native CUDA support for NVIDIA GPUs
+- **Model Compatibility**: Supports a wide range of popular model architectures
+
+## Pre-Built vLLM Distributions
+
+The operator includes two pre-built vLLM distributions:
+
+### vllm-gpu (Self-Hosted)
+- **Image**: `docker.io/llamastack/distribution-vllm-gpu:latest`
+- **Purpose**: GPU-accelerated vLLM inference with local model serving
+- **Requirements**: NVIDIA GPU with CUDA support
+- **Infrastructure**: You provide GPU infrastructure
+- **Use Case**: High-performance inference for production workloads
+
+### remote-vllm (External Connection)
+- **Image**: `docker.io/llamastack/distribution-remote-vllm:latest`
+- **Purpose**: Connect to external vLLM server
+- **Requirements**: Access to external vLLM endpoint
+- **Infrastructure**: External vLLM server required
+- **Use Case**: Using existing vLLM deployments or managed services
+
+## Quick Start with vLLM
+
+### 1. Create a LlamaStackDistribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-vllm-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "vllm-gpu"  # Use supported distribution
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          nvidia.com/gpu: "1"
+          memory: "16Gi"
+          cpu: "4"
+        limits:
+          nvidia.com/gpu: "1"
+          memory: "32Gi"
+          cpu: "8"
+      env:
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-2-7b-chat-hf"
+    storage:
+      size: "50Gi"
+      mountPath: "/.llama"
+```
+
+### 2. Deploy the Distribution
+
+```bash
+kubectl apply -f vllm-distribution.yaml
+```
+
+### 3. Verify Deployment
+
+```bash
+kubectl get llamastackdistribution my-vllm-distribution
+kubectl get pods -l app=llama-stack
+```
+
+## Configuration Options
+
+### Container Specification
+
+The `containerSpec` section allows you to configure the container:
+
+```yaml
+spec:
+  server:
+    containerSpec:
+      name: "llama-stack"  # Optional, defaults to "llama-stack"
+      port: 8321           # Optional, defaults to 8321
+      resources:
+        requests:
+          nvidia.com/gpu: "1"
+          memory: "16Gi"
+          cpu: "4"
+        limits:
+          nvidia.com/gpu: "1"
+          memory: "32Gi"
+          cpu: "8"
+      env:
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-2-7b-chat-hf"
+        - name: VLLM_GPU_MEMORY_UTILIZATION
+          value: "0.9"
+        - name: VLLM_MAX_SEQ_LEN
+          value: "4096"
+```
+
+### Environment Variables
+
+Configure vLLM behavior through environment variables:
+
+```yaml
+env:
+  - name: INFERENCE_MODEL
+    value: "meta-llama/Llama-2-7b-chat-hf"
+  - name: VLLM_GPU_MEMORY_UTILIZATION
+    value: "0.9"
+  - name: VLLM_MAX_SEQ_LEN
+    value: "4096"
+  - name: VLLM_MAX_BATCH_SIZE
+    value: "32"
+  - name: VLLM_TENSOR_PARALLEL_SIZE
+    value: "1"
+```
+
+### Resource Requirements
+
+```yaml
+resources:
+  requests:
+    nvidia.com/gpu: "1"
+    memory: "16Gi"
+    cpu: "4"
+  limits:
+    nvidia.com/gpu: "1"
+    memory: "32Gi"
+    cpu: "8"
+```
+
+### Storage Configuration
+
+```yaml
+storage:
+  size: "50Gi"
+  mountPath: "/.llama"  # Optional, defaults to "/.llama"
+```
+
+## Advanced Configuration
+
+### Multi-GPU Setup
+
+For larger models requiring multiple GPUs:
+
+```yaml
+spec:
+  server:
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "4"
+          memory: "64Gi"
+          cpu: "16"
+        limits:
+          nvidia.com/gpu: "4"
+          memory: "128Gi"
+          cpu: "32"
+      env:
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-2-70b-chat-hf"
+        - name: VLLM_TENSOR_PARALLEL_SIZE
+          value: "4"
+```
+
+### Custom Volumes with Pod Overrides
+
+```yaml
+spec:
+  server:
+    podOverrides:
+      volumes:
+        - name: model-cache
+          persistentVolumeClaim:
+            claimName: model-cache-pvc
+      volumeMounts:
+        - name: model-cache
+          mountPath: /models
+    containerSpec:
+      env:
+        - name: INFERENCE_MODEL
+          value: "/models/custom-llama-model"
+```
+
+### Scaling with Multiple Replicas
+
+```yaml
+spec:
+  replicas: 3
+  server:
+    distribution:
+      name: "vllm-gpu"
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "1"
+          memory: "16Gi"
+        limits:
+          nvidia.com/gpu: "1"
+          memory: "32Gi"
+```
+
+## Using vLLM with the Kubernetes Operator
+
+The LlamaStack Kubernetes operator supports vLLM in two ways:
+
+### 1. Pre-Built Distributions (Recommended)
+
+Use pre-built, maintained distributions with the `distribution.name` field:
+
+#### vllm-gpu Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: vllm-gpu-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "vllm-gpu"  # Supported distribution
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "1"
+          memory: "16Gi"
+          cpu: "4"
+        limits:
+          nvidia.com/gpu: "1"
+          memory: "32Gi"
+          cpu: "8"
+      env:
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-2-7b-chat-hf"
+        - name: VLLM_GPU_MEMORY_UTILIZATION
+          value: "0.9"
+    storage:
+      size: "50Gi"
+```
+
+#### remote-vllm Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: remote-vllm-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "remote-vllm"  # Supported distribution
+    containerSpec:
+      resources:
+        requests:
+          memory: "4Gi"
+          cpu: "2"
+        limits:
+          memory: "8Gi"
+          cpu: "4"
+      env:
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-2-7b-chat-hf"
+        - name: VLLM_URL
+          value: "http://external-vllm-service:8000"
+```
+
+### 2. Bring Your Own (BYO) Custom Images
+
+Use custom-built distributions with the `distribution.image` field:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-vllm-distribution
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      image: "my-registry.com/custom-vllm:v1.0.0"  # Custom image
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "2"
+          memory: "32Gi"
+          cpu: "8"
+        limits:
+          nvidia.com/gpu: "2"
+          memory: "64Gi"
+          cpu: "16"
+      env:
+        - name: INFERENCE_MODEL
+          value: "my-custom-model"
+        - name: CUSTOM_VLLM_SETTING
+          value: "optimized"
+    storage:
+      size: "100Gi"
+```
+
+## Building Custom vLLM Distributions
+
+### Step 1: Build with LlamaStack CLI
+
+#### Option A: From Template
+
+```bash
+# Install LlamaStack CLI
+pip install llama-stack
+
+# Build from vLLM template
+llama stack build --template vllm-gpu --image-type container --image-name my-vllm-dist
+```
+
+#### Option B: Custom Configuration
+
+Create `custom-vllm-build.yaml`:
+
+```yaml
+name: custom-vllm
+distribution_spec:
+  description: Custom vLLM distribution with optimizations
+  providers:
+    inference: inline::vllm
+    memory: inline::faiss
+    safety: inline::llama-guard
+    agents: inline::meta-reference
+    telemetry: inline::meta-reference
+image_name: custom-vllm
+image_type: container
+```
+
+Build the distribution:
+
+```bash
+llama stack build --config custom-vllm-build.yaml
+```
+
+### Step 2: Enhance with Custom Dockerfile
+
+Create `Dockerfile.enhanced`:
+
+```dockerfile
+FROM distribution-custom-vllm:dev
+
+# Install additional dependencies
+RUN pip install \
+    flash-attn \
+    custom-optimization-lib \
+    monitoring-tools
+
+# Add custom configurations
+COPY vllm-config.json /app/config.json
+COPY custom-models/ /app/models/
+
+# Set optimization environment variables
+ENV VLLM_USE_FLASH_ATTN=1
+ENV VLLM_OPTIMIZATION_LEVEL=high
+ENV CUSTOM_GPU_SETTINGS=enabled
+
+# Add health check script
+COPY health-check.sh /app/health-check.sh
+RUN chmod +x /app/health-check.sh
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+  CMD /app/health-check.sh
+
+EXPOSE 8321
+```
+
+Build the enhanced image:
+
+```bash
+docker build -f Dockerfile.enhanced -t my-registry.com/enhanced-vllm:v1.0.0 .
+```
+
+### Step 3: Push to Registry
+
+```bash
+# Tag for your registry
+docker tag my-registry.com/enhanced-vllm:v1.0.0 my-registry.com/enhanced-vllm:latest
+
+# Push to registry
+docker push my-registry.com/enhanced-vllm:v1.0.0
+docker push my-registry.com/enhanced-vllm:latest
+```
+
+### Step 4: Deploy with Operator
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: enhanced-vllm-dist
+  namespace: production
+spec:
+  replicas: 2
+  server:
+    distribution:
+      image: "my-registry.com/enhanced-vllm:v1.0.0"
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "2"
+          memory: "32Gi"
+          cpu: "8"
+        limits:
+          nvidia.com/gpu: "2"
+          memory: "64Gi"
+          cpu: "16"
+      env:
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-2-13b-chat-hf"
+        - name: VLLM_TENSOR_PARALLEL_SIZE
+          value: "2"
+        - name: VLLM_GPU_MEMORY_UTILIZATION
+          value: "0.85"
+        - name: CUSTOM_OPTIMIZATION
+          value: "enabled"
+    storage:
+      size: "200Gi"
+    podOverrides:
+      volumes:
+        - name: model-cache
+          persistentVolumeClaim:
+            claimName: shared-model-cache
+      volumeMounts:
+        - name: model-cache
+          mountPath: /shared-models
+```
+
+## Comparison: Pre-Built vs BYO
+
+| Aspect | Pre-Built Distributions | BYO Custom Images |
+|--------|------------------------|-------------------|
+| **Setup Complexity** | Simple - just specify `name` | Complex - build and maintain images |
+| **Maintenance** | Maintained by LlamaStack team | You maintain the images |
+| **Customization** | Limited to environment variables | Full control over dependencies and configuration |
+| **Security** | Vetted by maintainers | You control security scanning and updates |
+| **Performance** | Standard optimizations | Custom optimizations possible |
+| **Support** | Community and official support | Self-supported |
+| **Updates** | Automatic with operator updates | Manual image rebuilds required |
+
+### When to Use Pre-Built Distributions
+
+- **Quick deployment** and standard use cases
+- **Production environments** where stability is key
+- **Limited customization** requirements
+- **Teams without container expertise**
+
+### When to Use BYO Custom Images
+
+- **Specialized models** or inference engines
+- **Custom optimizations** for specific hardware
+- **Additional dependencies** not in standard images
+- **Compliance requirements** for image provenance
+- **Integration** with existing infrastructure
+
+## Monitoring and Troubleshooting
+
+### Health Checks
+
+The vLLM distribution includes built-in health checks:
+
+```bash
+# Check pod status
+kubectl get pods -l app=llama-stack
+
+# View logs
+kubectl logs -l app=llama-stack
+
+# Check service endpoints
+kubectl get svc my-vllm-distribution-service
+```
+
+### Performance Monitoring
+
+```bash
+# Monitor GPU utilization
+kubectl exec -it <vllm-pod> -- nvidia-smi
+
+# Check memory usage
+kubectl top pods -l app=llama-stack
+```
+
+### Common Issues
+
+1. **GPU Not Available**
+   - Ensure NVIDIA device plugin is installed
+   - Verify GPU resources in node capacity
+
+2. **Out of Memory**
+   - Reduce `VLLM_GPU_MEMORY_UTILIZATION`
+   - Increase memory limits
+   - Use smaller models
+
+3. **Model Loading Failures**
+   - Check model path and permissions
+   - Verify sufficient storage space
+   - Check environment variable values
+
+## Best Practices
+
+### Resource Planning
+
+- **GPU Memory**: Ensure sufficient VRAM for model + batch processing
+- **CPU**: Allocate adequate CPU for preprocessing and coordination
+- **Storage**: Use fast storage (NVMe SSD) for model loading
+
+### Environment Variable Guidelines
+
+- Use `INFERENCE_MODEL` to specify the model to load
+- Set `VLLM_GPU_MEMORY_UTILIZATION` to control GPU memory usage (0.8-0.9 recommended)
+- Configure `VLLM_MAX_SEQ_LEN` based on your use case requirements
+- Use `VLLM_TENSOR_PARALLEL_SIZE` for multi-GPU setups
+
+### Security
+
+- Use private registries for custom images
+- Implement proper RBAC for distribution management
+- Secure model storage with appropriate access controls
+
+## Examples
+
+### Production Setup
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: production-vllm
+  namespace: llama-production
+spec:
+  replicas: 2
+  server:
+    distribution:
+      name: "vllm-gpu"
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "2"
+          memory: "32Gi"
+          cpu: "8"
+        limits:
+          nvidia.com/gpu: "2"
+          memory: "64Gi"
+          cpu: "16"
+      env:
+        - name: INFERENCE_MODEL
+          value: "meta-llama/Llama-2-13b-chat-hf"
+        - name: VLLM_TENSOR_PARALLEL_SIZE
+          value: "2"
+        - name: VLLM_GPU_MEMORY_UTILIZATION
+          value: "0.85"
+    storage:
+      size: "100Gi"
+```
+
+### Development Setup
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: dev-vllm
+  namespace: development
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "vllm-gpu"
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "1"
+          memory: "8Gi"
+          cpu: "2"
+        limits:
+          nvidia.com/gpu: "1"
+          memory: "16Gi"
+          cpu: "4"
+      env:
+        - name: INFERENCE_MODEL
+          value: "microsoft/DialoGPT-small"
+    storage:
+      size: "20Gi"
+```
+
+## API Reference
+
+For complete API documentation, see:
+- [API Reference](../reference/api.md)
+- [Configuration Reference](../reference/configuration.md)
+
+## Next Steps
+
+- [Configure Storage](../how-to/configure-storage.md)
+- [Scaling Guide](../how-to/scaling.md)
+- [Monitoring Setup](../how-to/monitoring.md)
+- [Ollama Distribution](ollama.md)
diff --git a/docs/content/examples/basic-deployment.md b/docs/content/examples/basic-deployment.md
new file mode 100644
index 000000000..0ce40fd0a
--- /dev/null
+++ b/docs/content/examples/basic-deployment.md
@@ -0,0 +1,328 @@
+# Basic Deployment Example
+
+This example demonstrates a simple LlamaStack deployment suitable for development and testing environments.
+
+## Overview
+
+This configuration creates a single-replica LlamaStack instance using the ollama distribution with basic resource allocation.
+
+## Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: basic-llamastack
+  namespace: default
+  labels:
+    app: llamastack
+    environment: development
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "ollama"
+    containerSpec:
+      name: "llama-stack"
+      port: 8321
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "500m"
+        limits:
+          memory: "4Gi"
+          cpu: "1"
+      env:
+      - name: LOG_LEVEL
+        value: "info"
+      - name: INFERENCE_MODEL
+        value: "meta-llama/Llama-3.2-3B-Instruct"
+```
+
+## Deployment Steps
+
+1. **Save the configuration** to a file named `basic-deployment.yaml`
+
+2. **Apply the configuration**:
+   ```bash
+   kubectl apply -f basic-deployment.yaml
+   ```
+
+3. **Verify the deployment**:
+   ```bash
+   kubectl get llamastackdistribution basic-llamastack
+   kubectl get pods -l app=llama-stack
+   ```
+
+4. **Check the status**:
+   ```bash
+   kubectl describe llamastackdistribution basic-llamastack
+   ```
+
+## Expected Resources
+
+This deployment will create:
+
+- **Deployment**: `basic-llamastack` with 1 replica
+- **Service**: `basic-llamastack` exposing port 8321
+- **ConfigMap**: Configuration for the LlamaStack instance
+- **Pod**: Single pod running the LlamaStack container
+
+## Accessing the Service
+
+### Port Forward (Development)
+
+```bash
+kubectl port-forward service/basic-llamastack 8321:8321
+```
+
+Access at: `http://localhost:8321`
+
+### Service Exposure (Testing)
+
+Create a NodePort service for external access:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: basic-llamastack-nodeport
+spec:
+  type: NodePort
+  selector:
+    app: llama-stack
+    llamastack.io/instance: basic-llamastack
+  ports:
+  - port: 8321
+    targetPort: 8321
+    nodePort: 30321
+    protocol: TCP
+```
+
+## Testing the Deployment
+
+### Health Check
+
+```bash
+curl http://localhost:8321/health
+```
+
+Expected response:
+```json
+{
+  "status": "healthy",
+  "version": "0.0.1",
+  "distribution": "meta-reference"
+}
+```
+
+### API Endpoints
+
+```bash
+# List providers
+curl http://localhost:8321/providers
+
+# Get distribution info
+curl http://localhost:8321/distribution/info
+
+# List available models
+curl http://localhost:8321/models
+```
+
+## Resource Usage
+
+This basic deployment typically uses:
+
+- **CPU**: 0.5-1 core
+- **Memory**: 2-4 GB
+- **Storage**: Ephemeral (no persistent storage)
+- **Network**: Single service port (8321)
+
+## Monitoring
+
+### Pod Status
+
+```bash
+# Check pod status
+kubectl get pods -l app=llama-stack
+
+# View pod details
+kubectl describe pod -l app=llama-stack
+
+# Check resource usage
+kubectl top pod -l app=llama-stack
+```
+
+### Logs
+
+```bash
+# View recent logs
+kubectl logs deployment/basic-llamastack
+
+# Follow logs in real-time
+kubectl logs -f deployment/basic-llamastack
+
+# View logs with timestamps
+kubectl logs deployment/basic-llamastack --timestamps
+```
+
+## Scaling
+
+### Manual Scaling
+
+Scale the deployment to multiple replicas:
+
+```bash
+# Scale to 3 replicas
+kubectl scale llamastackdistribution basic-llamastack --replicas=3
+
+# Verify scaling
+kubectl get pods -l app=llama-stack
+```
+
+### Resource Updates
+
+Update resource allocations:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: basic-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "meta-reference"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "4Gi"  # Increased from 2Gi
+          cpu: "1"       # Increased from 500m
+        limits:
+          memory: "8Gi"  # Increased from 4Gi
+          cpu: "2"       # Increased from 1
+```
+
+Apply the update:
+```bash
+kubectl apply -f basic-deployment.yaml
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**Pod not starting:**
+```bash
+# Check pod events
+kubectl describe pod -l app=llama-stack
+
+# Check resource constraints
+kubectl describe node
+```
+
+**Service not accessible:**
+```bash
+# Check service endpoints
+kubectl get endpoints basic-llamastack
+
+# Verify service configuration
+kubectl describe service basic-llamastack
+```
+
+**Application errors:**
+```bash
+# Check application logs
+kubectl logs deployment/basic-llamastack --tail=50
+
+# Check for configuration issues
+kubectl get configmap -l app=llama-stack
+```
+
+### Debug Commands
+
+```bash
+# Get detailed resource information
+kubectl get llamastackdistribution basic-llamastack -o yaml
+
+# Check events in the namespace
+kubectl get events --sort-by=.metadata.creationTimestamp
+
+# Exec into the pod for debugging
+kubectl exec -it deployment/basic-llamastack -- /bin/bash
+```
+
+## Cleanup
+
+Remove the deployment:
+
+```bash
+# Delete the LlamaStack instance
+kubectl delete llamastackdistribution basic-llamastack
+
+# Verify cleanup
+kubectl get pods -l app=llama-stack
+kubectl get services -l app=llama-stack
+```
+
+## Next Steps
+
+After successfully deploying this basic example:
+
+1. **[Try the production setup](production-setup.md)** - Learn about production-ready configurations
+2. **[Add persistent storage](../how-to/configure-storage.md)** - Configure persistent volumes
+3. **[Set up monitoring](../how-to/monitoring.md)** - Add observability
+4. **[Configure scaling](../how-to/scaling.md)** - Learn about auto-scaling
+
+## Variations
+
+### Different Distribution
+
+Use the Ollama distribution instead:
+
+```yaml
+spec:
+  server:
+    distribution:
+      name: "ollama"
+    containerSpec:
+      port: 8321
+      env:
+      - name: OLLAMA_HOST
+        value: "0.0.0.0"
+```
+
+### Custom Environment Variables
+
+Add custom configuration:
+
+```yaml
+spec:
+  server:
+    containerSpec:
+      env:
+      - name: LLAMASTACK_CONFIG_PATH
+        value: "/config/llamastack.yaml"
+      - name: MODEL_CACHE_DIR
+        value: "/tmp/models"
+      - name: MAX_CONCURRENT_REQUESTS
+        value: "10"
+```
+
+### Resource Constraints
+
+For resource-constrained environments:
+
+```yaml
+spec:
+  server:
+    containerSpec:
+      resources:
+        requests:
+          memory: "1Gi"
+          cpu: "250m"
+        limits:
+          memory: "2Gi"
+          cpu: "500m"
diff --git a/docs/content/examples/custom-images.md b/docs/content/examples/custom-images.md
new file mode 100644
index 000000000..26f45b877
--- /dev/null
+++ b/docs/content/examples/custom-images.md
@@ -0,0 +1,78 @@
+# Custom Images
+
+Guide for building and using custom LlamaStack images with the Kubernetes operator.
+
+## Building Custom Images
+
+### Base Dockerfile
+
+```dockerfile
+FROM llamastack/llamastack:latest
+
+# Add custom models
+COPY models/ /models/
+
+# Add custom configurations
+COPY config/ /config/
+
+# Install additional dependencies
+RUN pip install custom-package
+
+# Set custom entrypoint
+COPY entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+ENTRYPOINT ["/entrypoint.sh"]
+```
+
+### Multi-stage Build
+
+```dockerfile
+# Build stage
+FROM python:3.11-slim as builder
+
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Runtime stage
+FROM llamastack/llamastack:latest
+
+COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
+COPY custom-code/ /app/
+
+CMD ["python", "/app/main.py"]
+```
+
+## Using Custom Images
+
+### Basic Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-llamastack
+spec:
+  image: "myregistry.com/custom-llamastack:v1.0.0"
+  imagePullPolicy: Always
+  imagePullSecrets:
+  - name: registry-credentials
+```
+
+### With Custom Configuration
+
+```yaml
+spec:
+  image: "myregistry.com/llamastack-custom:latest"
+  config:
+    models:
+    - name: "custom-model"
+      path: "/models/custom-model"
+      provider: "custom-provider"
+```
+
+## Next Steps
+
+- [Production Setup](production-setup.md)
+- [Basic Deployment](basic-deployment.md)
diff --git a/docs/content/examples/production-setup.md b/docs/content/examples/production-setup.md
new file mode 100644
index 000000000..4c3399c9e
--- /dev/null
+++ b/docs/content/examples/production-setup.md
@@ -0,0 +1,670 @@
+# Production Setup
+
+Complete guide for deploying LlamaStack in production environments.
+
+## Production Architecture
+
+### High-Level Overview
+
+```mermaid
+graph TB
+    LB[Load Balancer] --> IG[Ingress Gateway]
+    IG --> SVC[LlamaStack Service]
+    SVC --> POD1[LlamaStack Pod 1]
+    SVC --> POD2[LlamaStack Pod 2]
+    SVC --> POD3[LlamaStack Pod 3]
+    
+    POD1 --> PV1[Persistent Volume 1]
+    POD2 --> PV2[Persistent Volume 2]
+    POD3 --> PV3[Persistent Volume 3]
+    
+    MON[Monitoring] --> POD1
+    MON --> POD2
+    MON --> POD3
+```
+
+### Infrastructure Requirements
+
+- **Kubernetes**: v1.24+
+- **Nodes**: 3+ worker nodes with GPU support
+- **Storage**: High-performance SSD storage
+- **Network**: Low-latency networking
+- **Monitoring**: Prometheus + Grafana stack
+
+## Production Configuration
+
+### Complete Production Manifest
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: llamastack-production
+  namespace: llamastack-prod
+  labels:
+    app: llamastack
+    environment: production
+    version: v1.0.0
+spec:
+  # Image configuration
+  image: llamastack/llamastack:v1.0.0
+  imagePullPolicy: IfNotPresent
+  imagePullSecrets:
+  - name: registry-credentials
+  
+  # Scaling configuration
+  replicas: 3
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxUnavailable: 1
+      maxSurge: 1
+  
+  # Resource configuration
+  resources:
+    requests:
+      cpu: "4"
+      memory: "8Gi"
+      nvidia.com/gpu: "1"
+    limits:
+      cpu: "8"
+      memory: "16Gi"
+      nvidia.com/gpu: "1"
+  
+  # Storage configuration
+  storage:
+    models:
+      size: "1Ti"
+      storageClass: "fast-ssd"
+      mountPath: "/models"
+      accessMode: ReadWriteOnce
+    data:
+      size: "500Gi"
+      storageClass: "standard-ssd"
+      mountPath: "/data"
+      accessMode: ReadWriteMany
+    cache:
+      size: "100Gi"
+      storageClass: "fast-ssd"
+      mountPath: "/cache"
+      accessMode: ReadWriteOnce
+  
+  # LlamaStack configuration
+  config:
+    models:
+    - name: "llama2-70b-chat"
+      path: "/models/llama2-70b-chat"
+      provider: "meta-reference"
+      config:
+        max_seq_len: 4096
+        max_batch_size: 4
+    - name: "llama2-13b-chat"
+      path: "/models/llama2-13b-chat"
+      provider: "meta-reference"
+      config:
+        max_seq_len: 4096
+        max_batch_size: 8
+    
+    inference:
+      provider: "meta-reference"
+      config:
+        model: "llama2-70b-chat"
+        max_tokens: 2048
+        temperature: 0.7
+        top_p: 0.9
+    
+    safety:
+      provider: "llama-guard"
+      config:
+        model: "llama-guard-7b"
+        enable_prompt_guard: true
+        enable_response_guard: true
+    
+    memory:
+      provider: "faiss"
+      config:
+        vector_store:
+          provider: "faiss"
+          config:
+            dimension: 4096
+            index_type: "IndexFlatIP"
+  
+  # Security configuration
+  securityContext:
+    runAsNonRoot: true
+    runAsUser: 1000
+    runAsGroup: 1000
+    fsGroup: 1000
+    seccompProfile:
+      type: RuntimeDefault
+  
+  containerSecurityContext:
+    allowPrivilegeEscalation: false
+    readOnlyRootFilesystem: true
+    capabilities:
+      drop:
+      - ALL
+  
+  # Scheduling configuration
+  nodeSelector:
+    node-type: "gpu"
+    zone: "us-west-2a"
+  
+  tolerations:
+  - key: "nvidia.com/gpu"
+    operator: "Exists"
+    effect: "NoSchedule"
+  - key: "dedicated"
+    operator: "Equal"
+    value: "llamastack"
+    effect: "NoSchedule"
+  
+  affinity:
+    nodeAffinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+        nodeSelectorTerms:
+        - matchExpressions:
+          - key: "kubernetes.io/arch"
+            operator: "In"
+            values: ["amd64"]
+          - key: "node-type"
+            operator: "In"
+            values: ["gpu"]
+    podAntiAffinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+      - labelSelector:
+          matchExpressions:
+          - key: "app"
+            operator: "In"
+            values: ["llamastack"]
+        topologyKey: "kubernetes.io/hostname"
+  
+  # Service configuration
+  service:
+    type: ClusterIP
+    port: 8080
+    targetPort: 8080
+    annotations:
+      prometheus.io/scrape: "true"
+      prometheus.io/port: "9090"
+      prometheus.io/path: "/metrics"
+  
+  # Ingress configuration
+  ingress:
+    enabled: true
+    className: "nginx"
+    annotations:
+      nginx.ingress.kubernetes.io/rewrite-target: /
+      nginx.ingress.kubernetes.io/ssl-redirect: "true"
+      nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
+      cert-manager.io/cluster-issuer: "letsencrypt-prod"
+    hosts:
+    - host: "api.llamastack.example.com"
+      paths:
+      - path: "/"
+        pathType: "Prefix"
+    tls:
+    - secretName: "llamastack-tls"
+      hosts:
+      - "api.llamastack.example.com"
+  
+  # Health checks
+  healthCheck:
+    livenessProbe:
+      httpGet:
+        path: "/health"
+        port: 8080
+      initialDelaySeconds: 60
+      periodSeconds: 30
+      timeoutSeconds: 10
+      failureThreshold: 3
+    readinessProbe:
+      httpGet:
+        path: "/ready"
+        port: 8080
+      initialDelaySeconds: 30
+      periodSeconds: 10
+      timeoutSeconds: 5
+      failureThreshold: 3
+  
+  # Monitoring
+  metrics:
+    enabled: true
+    port: 9090
+    path: "/metrics"
+    serviceMonitor:
+      enabled: true
+      interval: "30s"
+      scrapeTimeout: "10s"
+  
+  # Environment variables
+  env:
+  - name: LLAMASTACK_LOG_LEVEL
+    value: "INFO"
+  - name: LLAMASTACK_METRICS_ENABLED
+    value: "true"
+  - name: LLAMASTACK_CACHE_ENABLED
+    value: "true"
+  - name: LLAMASTACK_MAX_WORKERS
+    value: "4"
+```
+
+## Supporting Resources
+
+### Namespace
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: llamastack-prod
+  labels:
+    name: llamastack-prod
+    environment: production
+```
+
+### Storage Classes
+
+```yaml
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: fast-ssd
+provisioner: kubernetes.io/aws-ebs
+parameters:
+  type: gp3
+  iops: "10000"
+  throughput: "1000"
+allowVolumeExpansion: true
+reclaimPolicy: Retain
+---
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: standard-ssd
+provisioner: kubernetes.io/aws-ebs
+parameters:
+  type: gp3
+  iops: "3000"
+  throughput: "125"
+allowVolumeExpansion: true
+reclaimPolicy: Retain
+```
+
+### Network Policies
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: llamastack-netpol
+  namespace: llamastack-prod
+spec:
+  podSelector:
+    matchLabels:
+      app: llamastack
+  policyTypes:
+  - Ingress
+  - Egress
+  ingress:
+  - from:
+    - namespaceSelector:
+        matchLabels:
+          name: ingress-nginx
+    - podSelector:
+        matchLabels:
+          app: prometheus
+    ports:
+    - protocol: TCP
+      port: 8080
+    - protocol: TCP
+      port: 9090
+  egress:
+  - to: []
+    ports:
+    - protocol: TCP
+      port: 53
+    - protocol: UDP
+      port: 53
+  - to: []
+    ports:
+    - protocol: TCP
+      port: 443
+    - protocol: TCP
+      port: 80
+```
+
+## Auto Scaling
+
+### Horizontal Pod Autoscaler
+
+```yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: llamastack-hpa
+  namespace: llamastack-prod
+spec:
+  scaleTargetRef:
+    apiVersion: llamastack.io/v1alpha1
+    kind: LlamaStackDistribution
+    name: llamastack-production
+  minReplicas: 3
+  maxReplicas: 10
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: 70
+  - type: Resource
+    resource:
+      name: memory
+      target:
+        type: Utilization
+        averageUtilization: 80
+  - type: Pods
+    pods:
+      metric:
+        name: llamastack_active_requests
+      target:
+        type: AverageValue
+        averageValue: "100"
+  behavior:
+    scaleUp:
+      stabilizationWindowSeconds: 300
+      policies:
+      - type: Percent
+        value: 50
+        periodSeconds: 60
+    scaleDown:
+      stabilizationWindowSeconds: 300
+      policies:
+      - type: Percent
+        value: 25
+        periodSeconds: 60
+```
+
+### Vertical Pod Autoscaler
+
+```yaml
+apiVersion: autoscaling.k8s.io/v1
+kind: VerticalPodAutoscaler
+metadata:
+  name: llamastack-vpa
+  namespace: llamastack-prod
+spec:
+  targetRef:
+    apiVersion: llamastack.io/v1alpha1
+    kind: LlamaStackDistribution
+    name: llamastack-production
+  updatePolicy:
+    updateMode: "Auto"
+  resourcePolicy:
+    containerPolicies:
+    - containerName: llamastack
+      maxAllowed:
+        cpu: "16"
+        memory: "32Gi"
+      minAllowed:
+        cpu: "2"
+        memory: "4Gi"
+      controlledResources: ["cpu", "memory"]
+```
+
+## Monitoring Setup
+
+### ServiceMonitor
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: llamastack-monitor
+  namespace: llamastack-prod
+spec:
+  selector:
+    matchLabels:
+      app: llamastack
+  endpoints:
+  - port: metrics
+    interval: 30s
+    path: /metrics
+    scrapeTimeout: 10s
+```
+
+### PrometheusRule
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: llamastack-alerts
+  namespace: llamastack-prod
+spec:
+  groups:
+  - name: llamastack.rules
+    rules:
+    - alert: LlamaStackDown
+      expr: up{job="llamastack"} == 0
+      for: 1m
+      labels:
+        severity: critical
+      annotations:
+        summary: "LlamaStack instance is down"
+        description: "LlamaStack instance {{ $labels.instance }} has been down for more than 1 minute."
+    
+    - alert: HighErrorRate
+      expr: rate(llamastack_requests_total{status=~"5.."}[5m]) > 0.1
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: "High error rate detected"
+        description: "Error rate is {{ $value }} errors per second."
+    
+    - alert: HighLatency
+      expr: histogram_quantile(0.95, rate(llamastack_request_duration_seconds_bucket[5m])) > 5
+      for: 5m
+      labels:
+        severity: warning
+      annotations:
+        summary: "High latency detected"
+        description: "95th percentile latency is {{ $value }} seconds."
+```
+
+## Backup Strategy
+
+### Automated Backups
+
+```yaml
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: llamastack-backup
+  namespace: llamastack-prod
+spec:
+  schedule: "0 2 * * *"  # Daily at 2 AM
+  jobTemplate:
+    spec:
+      template:
+        spec:
+          containers:
+          - name: backup
+            image: velero/velero:latest
+            command:
+            - /bin/sh
+            - -c
+            - |
+              velero backup create llamastack-$(date +%Y%m%d-%H%M%S) \
+                --include-namespaces llamastack-prod \
+                --storage-location default \
+                --ttl 720h0m0s
+          restartPolicy: OnFailure
+```
+
+## Security Hardening
+
+### Pod Security Policy
+
+```yaml
+apiVersion: policy/v1beta1
+kind: PodSecurityPolicy
+metadata:
+  name: llamastack-psp
+spec:
+  privileged: false
+  allowPrivilegeEscalation: false
+  requiredDropCapabilities:
+    - ALL
+  volumes:
+    - 'configMap'
+    - 'emptyDir'
+    - 'projected'
+    - 'secret'
+    - 'downwardAPI'
+    - 'persistentVolumeClaim'
+  runAsUser:
+    rule: 'MustRunAsNonRoot'
+  seLinux:
+    rule: 'RunAsAny'
+  fsGroup:
+    rule: 'RunAsAny'
+```
+
+### RBAC
+
+```yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  namespace: llamastack-prod
+  name: llamastack-role
+rules:
+- apiGroups: [""]
+  resources: ["configmaps", "secrets"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: llamastack-rolebinding
+  namespace: llamastack-prod
+subjects:
+- kind: ServiceAccount
+  name: llamastack
+  namespace: llamastack-prod
+roleRef:
+  kind: Role
+  name: llamastack-role
+  apiGroup: rbac.authorization.k8s.io
+```
+
+## Deployment Process
+
+### 1. Pre-deployment Checklist
+
+- [ ] Kubernetes cluster ready (v1.24+)
+- [ ] GPU nodes available and labeled
+- [ ] Storage classes configured
+- [ ] Monitoring stack deployed
+- [ ] Ingress controller configured
+- [ ] TLS certificates ready
+- [ ] Image registry accessible
+- [ ] Backup solution configured
+
+### 2. Deployment Steps
+
+```bash
+# 1. Create namespace
+kubectl apply -f namespace.yaml
+
+# 2. Create storage classes
+kubectl apply -f storage-classes.yaml
+
+# 3. Create RBAC resources
+kubectl apply -f rbac.yaml
+
+# 4. Create network policies
+kubectl apply -f network-policies.yaml
+
+# 5. Deploy LlamaStack
+kubectl apply -f llamastack-production.yaml
+
+# 6. Create HPA
+kubectl apply -f hpa.yaml
+
+# 7. Create monitoring resources
+kubectl apply -f monitoring.yaml
+
+# 8. Verify deployment
+kubectl get llamastackdistribution -n llamastack-prod
+kubectl get pods -n llamastack-prod
+```
+
+### 3. Post-deployment Verification
+
+```bash
+# Check pod status
+kubectl get pods -n llamastack-prod -l app=llamastack
+
+# Check service endpoints
+kubectl get endpoints -n llamastack-prod
+
+# Test health endpoints
+kubectl exec -n llamastack-prod -it <pod-name> -- curl http://localhost:8080/health
+
+# Check metrics
+kubectl port-forward -n llamastack-prod svc/llamastack-production 9090:9090
+curl http://localhost:9090/metrics
+
+# Test ingress
+curl -k https://api.llamastack.example.com/health
+```
+
+## Maintenance
+
+### Rolling Updates
+
+```bash
+# Update image version
+kubectl patch llamastackdistribution llamastack-production -n llamastack-prod \
+  -p '{"spec":{"image":"llamastack/llamastack:v1.1.0"}}'
+
+# Monitor rollout
+kubectl rollout status deployment/llamastack-production -n llamastack-prod
+```
+
+### Scaling Operations
+
+```bash
+# Manual scaling
+kubectl scale llamastackdistribution llamastack-production -n llamastack-prod --replicas=5
+
+# Check HPA status
+kubectl get hpa -n llamastack-prod
+```
+
+### Backup and Recovery
+
+```bash
+# Manual backup
+velero backup create llamastack-manual --include-namespaces llamastack-prod
+
+# List backups
+velero backup get
+
+# Restore from backup
+velero restore create --from-backup llamastack-20240101-120000
+```
+
+## Next Steps
+
+- [Custom Images Guide](custom-images.md)
+- [Monitoring Setup](../how-to/monitoring.md)
+- [Scaling Guide](../how-to/scaling.md)
+- [Troubleshooting](../how-to/troubleshooting.md)
diff --git a/docs/content/getting-started/configuration.md b/docs/content/getting-started/configuration.md
new file mode 100644
index 000000000..ee96f1aa0
--- /dev/null
+++ b/docs/content/getting-started/configuration.md
@@ -0,0 +1,69 @@
+# Configuration
+
+This guide covers how to configure the LlamaStack Kubernetes Operator for your environment.
+
+## Basic Configuration
+
+The operator can be configured through various methods:
+
+### Environment Variables
+
+Key environment variables for the operator:
+
+```bash
+# Operator configuration
+OPERATOR_NAMESPACE=llamastack-system
+LOG_LEVEL=info
+METRICS_ADDR=:8080
+```
+
+### ConfigMaps
+
+The operator uses ConfigMaps for distribution configurations:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: llamastack-config
+  namespace: llamastack-system
+data:
+  config.yaml: |
+    distributions:
+      - name: default
+        image: llamastack/llamastack:latest
+```
+
+## Advanced Configuration
+
+### Resource Limits
+
+Configure resource limits for LlamaStack distributions:
+
+```yaml
+spec:
+  resources:
+    limits:
+      cpu: "2"
+      memory: "4Gi"
+    requests:
+      cpu: "1"
+      memory: "2Gi"
+```
+
+### Storage Configuration
+
+Configure persistent storage for your distributions:
+
+```yaml
+spec:
+  storage:
+    size: "10Gi"
+    storageClass: "fast-ssd"
+```
+
+## Next Steps
+
+- [Quick Start Guide](quick-start.md)
+- [API Reference](../reference/api.md)
+- [Troubleshooting](../how-to/troubleshooting.md)
diff --git a/docs/content/getting-started/distributions.md b/docs/content/getting-started/distributions.md
new file mode 100644
index 000000000..bcb461285
--- /dev/null
+++ b/docs/content/getting-started/distributions.md
@@ -0,0 +1,357 @@
+# Understanding LlamaStack Distributions
+
+This guide explains the different ways to deploy LlamaStack using the Kubernetes operator, focusing on the distinction between **Supported Distributions** and **Bring-Your-Own (BYO) Distributions**.
+
+## Distribution Types Overview
+
+The LlamaStack Kubernetes Operator supports two main approaches for deploying LlamaStack:
+
+### 🎯 **Supported Distributions** (Recommended)
+Pre-configured, tested distributions maintained by the LlamaStack team with specific provider integrations.
+
+### 🛠️ **Bring-Your-Own (BYO) Distributions**
+Custom container images that you build and maintain yourself.
+
+## Supported Distributions
+
+### What are Supported Distributions?
+
+Supported distributions are **pre-built, tested container images** that include:
+- ✅ **Specific provider integrations** (Ollama, vLLM, NVIDIA, etc.)
+- ✅ **Optimized configurations** for each provider
+- ✅ **Tested compatibility** with the operator
+- ✅ **Regular updates** and security patches
+- ✅ **Documentation and examples**
+
+### Available Pre-Built Distributions
+
+The operator currently supports **7 pre-built distributions** that are actively maintained and tested:
+
+| Distribution | Image | Use Case | Requirements |
+|--------------|-------|----------|--------------|
+| `starter` | `docker.io/llamastack/distribution-starter:latest` | **Recommended default** - General purpose LlamaStack | Basic Kubernetes resources |
+| `ollama` | `docker.io/llamastack/distribution-ollama:latest` | Local inference with Ollama integration | Ollama server |
+| `bedrock` | `docker.io/llamastack/distribution-bedrock:latest` | AWS Bedrock models | AWS credentials |
+| `remote-vllm` | `docker.io/llamastack/distribution-remote-vllm:latest` | Remote vLLM server integration | External vLLM server |
+| `tgi` | `docker.io/llamastack/distribution-tgi:latest` | Hugging Face Text Generation Inference | TGI server setup |
+| `together` | `docker.io/llamastack/distribution-together:latest` | Together AI API integration | Together API key |
+| `vllm-gpu` | `docker.io/llamastack/distribution-vllm-gpu:latest` | High-performance GPU inference with vLLM | GPU infrastructure |
+
+!!! note "Distribution Selection"
+    - **New users**: Start with `starter` distribution
+    - **Ollama users**: Use `ollama` distribution
+    - **GPU inference**: Use `vllm-gpu` distribution
+    - **Cloud APIs**: Use `bedrock` or `together` distributions
+
+### Using Supported Distributions
+
+#### Basic Syntax
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-distribution
+spec:
+  server:
+    distribution:
+      name: "distribution-name"  # Use distribution name
+    # ... other configuration
+```
+
+#### Example: Starter Distribution (Recommended)
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "starter"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          cpu: "1"
+          memory: "2Gi"
+        limits:
+          cpu: "2"
+          memory: "4Gi"
+    storage:
+      size: "20Gi"
+```
+
+#### Example: Ollama Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: ollama-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "ollama"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          cpu: "1"
+          memory: "2Gi"
+        limits:
+          cpu: "2"
+          memory: "4Gi"
+      env:
+      - name: OLLAMA_URL
+        value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
+    storage:
+      size: "20Gi"
+```
+
+#### Example: vLLM GPU Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: vllm-gpu-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "vllm-gpu"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          cpu: "2"
+          memory: "8Gi"
+          nvidia.com/gpu: "1"
+        limits:
+          cpu: "4"
+          memory: "16Gi"
+          nvidia.com/gpu: "1"
+      env:
+      - name: MODEL_NAME
+        value: "meta-llama/Llama-2-7b-chat-hf"
+      - name: TENSOR_PARALLEL_SIZE
+        value: "1"
+    storage:
+      size: "50Gi"
+```
+
+### Benefits of Supported Distributions
+
+- **🚀 Quick Setup**: No need to build custom images
+- **🔒 Security**: Regular security updates from LlamaStack team
+- **📚 Documentation**: Comprehensive guides and examples
+- **🧪 Tested**: Thoroughly tested with the operator
+- **🔧 Optimized**: Pre-configured for optimal performance
+- **🆘 Support**: Community and official support available
+
+## Bring-Your-Own (BYO) Distributions
+
+### What are BYO Distributions?
+
+BYO distributions allow you to use **custom container images** that you build and maintain:
+- 🛠️ **Custom integrations** not available in supported distributions
+- 🎨 **Specialized configurations** for your use case
+- 🔧 **Custom dependencies** and libraries
+- 📦 **Private or proprietary** model integrations
+
+### Using BYO Distributions
+
+#### Basic Syntax
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-custom-distribution
+spec:
+  server:
+    distribution:
+      image: "your-registry.com/custom-llamastack:tag"  # Use custom image
+    # ... other configuration
+```
+
+#### Example: Custom Image
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      image: "myregistry.com/custom-llamastack:v1.0.0"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          cpu: "2"
+          memory: "4Gi"
+        limits:
+          cpu: "4"
+          memory: "8Gi"
+      env:
+      - name: CUSTOM_CONFIG_PATH
+        value: "/app/config/custom.yaml"
+      - name: API_KEY
+        valueFrom:
+          secretKeyRef:
+            name: custom-credentials
+            key: api-key
+    storage:
+      size: "100Gi"
+    podOverrides:
+      volumes:
+      - name: custom-config
+        configMap:
+          name: custom-llamastack-config
+      volumeMounts:
+      - name: custom-config
+        mountPath: "/app/config"
+        readOnly: true
+```
+
+### Building Custom Images
+
+#### Example Dockerfile
+
+```dockerfile
+# Start from a supported distribution or base image
+FROM llamastack/llamastack:latest
+
+# Add your custom dependencies
+RUN pip install custom-package-1 custom-package-2
+
+# Copy custom configuration
+COPY custom-config/ /app/config/
+
+# Copy custom code
+COPY src/ /app/src/
+
+# Set custom environment variables
+ENV CUSTOM_SETTING=value
+
+# Override entrypoint if needed
+COPY entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+ENTRYPOINT ["/entrypoint.sh"]
+```
+
+#### Building and Pushing
+
+```bash
+# Build your custom image
+docker build -t myregistry.com/custom-llamastack:v1.0.0 .
+
+# Push to your registry
+docker push myregistry.com/custom-llamastack:v1.0.0
+```
+
+### BYO Distribution Considerations
+
+#### Advantages
+- **🎯 Full Control**: Complete customization of the stack
+- **🔧 Custom Integrations**: Add proprietary or specialized providers
+- **📦 Private Models**: Include private or fine-tuned models
+- **⚡ Optimizations**: Custom performance optimizations
+
+#### Responsibilities
+- **🔒 Security**: You maintain security updates
+- **🧪 Testing**: You test compatibility with the operator
+- **📚 Documentation**: You document your custom setup
+- **🆘 Support**: Limited community support for custom images
+- **🔄 Updates**: You manage updates and compatibility
+
+## Key Differences Summary
+
+| Aspect | Supported Distributions | BYO Distributions |
+|--------|------------------------|-------------------|
+| **Setup Complexity** | ✅ Simple (just specify name) | 🔧 Complex (build & maintain image) |
+| **Maintenance** | ✅ Handled by LlamaStack team | ❌ Your responsibility |
+| **Security Updates** | ✅ Automatic | ❌ Manual |
+| **Documentation** | ✅ Comprehensive | ❌ You create |
+| **Support** | ✅ Community + Official | ⚠️ Limited |
+| **Customization** | ⚠️ Limited to configuration | ✅ Full control |
+| **Testing** | ✅ Pre-tested | ❌ You test |
+| **Time to Deploy** | ✅ Minutes | ⏱️ Hours/Days |
+
+## Choosing the Right Approach
+
+### Use Supported Distributions When:
+- ✅ Your use case matches available providers (Ollama, vLLM, etc.)
+- ✅ You want quick setup and deployment
+- ✅ You prefer maintained and tested solutions
+- ✅ You need community support
+- ✅ Security and updates are important
+
+### Use BYO Distributions When:
+- 🛠️ You need custom provider integrations
+- 🔧 You have specialized requirements
+- 📦 You use proprietary or private models
+- ⚡ You need specific performance optimizations
+- 🎯 You have the expertise to maintain custom images
+
+## Migration Between Approaches
+
+### From Supported to BYO
+```yaml
+# Before (supported)
+spec:
+  server:
+    distribution:
+      name: "ollama"
+
+# After (BYO)
+spec:
+  server:
+    distribution:
+      image: "myregistry.com/custom-ollama:v1.0.0"
+```
+
+### From BYO to Supported
+```yaml
+# Before (BYO)
+spec:
+  server:
+    distribution:
+      image: "myregistry.com/custom-vllm:v1.0.0"
+
+# After (supported)
+spec:
+  server:
+    distribution:
+      name: "vllm-gpu"
+```
+
+## Best Practices
+
+### For Supported Distributions
+1. **Start Simple**: Begin with basic configuration
+2. **Use Environment Variables**: Configure via `env` section
+3. **Monitor Resources**: Set appropriate resource limits
+4. **Check Documentation**: Review provider-specific guides
+
+### For BYO Distributions
+1. **Base on Supported Images**: Start from `llamastack/llamastack:latest`
+2. **Document Everything**: Maintain clear documentation
+3. **Test Thoroughly**: Test with the operator before production
+4. **Version Control**: Tag and version your custom images
+5. **Security Scanning**: Regularly scan for vulnerabilities
+
+## Next Steps
+
+- [Configuration Reference](../reference/configuration.md) - Detailed configuration options
+- [Basic Deployment](../examples/basic-deployment.md) - Simple deployment examples
+- [Production Setup](../examples/production-setup.md) - Production-ready configurations
+- [Custom Images Guide](../examples/custom-images.md) - Building custom images
+- [Troubleshooting](../how-to/troubleshooting.md) - Common issues and solutions
diff --git a/docs/content/getting-started/installation.md b/docs/content/getting-started/installation.md
new file mode 100644
index 000000000..e9bacca1a
--- /dev/null
+++ b/docs/content/getting-started/installation.md
@@ -0,0 +1,273 @@
+# Installation Guide
+
+This guide walks you through installing the LlamaStack Kubernetes Operator in your cluster.
+
+## Prerequisites
+
+Before installing the operator, ensure you have:
+
+- **Kubernetes cluster** (version 1.25 or later)
+- **kubectl** configured to access your cluster
+- **Cluster admin permissions** to install CRDs and RBAC resources
+- **Container runtime** that supports pulling images from public registries
+
+## Installation Methods
+
+### Method 1: Kustomize (Recommended)
+
+The recommended way to install the operator is using Kustomize:
+
+```bash
+# Clone the repository
+git clone https://github.com/llamastack/llama-stack-k8s-operator.git
+cd llama-stack-k8s-operator
+
+# Install using Kustomize
+kubectl apply -k config/default
+```
+
+This will:
+- Install the Custom Resource Definitions (CRDs)
+- Create the necessary RBAC resources
+- Deploy the operator in the `llama-stack-k8s-operator-system` namespace
+
+### Method 2: Build from Source
+
+For development or customized builds:
+
+```bash
+# Clone the repository
+git clone https://github.com/llamastack/llama-stack-k8s-operator.git
+cd llama-stack-k8s-operator
+
+# Build and deploy
+make docker-build docker-push IMG=<your-registry>/llama-stack-k8s-operator:tag
+make deploy IMG=<your-registry>/llama-stack-k8s-operator:tag
+```
+
+## Verification
+
+After installation, verify that the operator is running:
+
+```bash
+# Check operator deployment
+kubectl get deployment -n llama-stack-k8s-operator-system llama-stack-k8s-operator-controller-manager
+
+# Check operator logs
+kubectl logs -n llama-stack-k8s-operator-system deployment/llama-stack-k8s-operator-controller-manager
+
+# Verify CRDs are installed
+kubectl get crd llamastackdistributions.llamastack.io
+```
+
+Expected output:
+```
+NAME                                        CREATED AT
+llamastackdistributions.llamastack.io      2024-01-15T10:30:00Z
+```
+
+## Configuration
+
+### Resource Requirements
+
+The operator has minimal resource requirements:
+
+```yaml
+resources:
+  limits:
+    cpu: 500m
+    memory: 128Mi
+  requests:
+    cpu: 10m
+    memory: 64Mi
+```
+
+### Environment Variables
+
+Configure the operator behavior using environment variables:
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `METRICS_BIND_ADDRESS` | Metrics server bind address | `:8080` |
+| `HEALTH_PROBE_BIND_ADDRESS` | Health probe bind address | `:8081` |
+| `LEADER_ELECT` | Enable leader election | `false` |
+| `LOG_LEVEL` | Logging level | `info` |
+
+### Custom Configuration
+
+For custom configurations, create a `kustomization.yaml`:
+
+```yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+- https://github.com/llamastack/llama-stack-k8s-operator/config/default
+
+patchesStrategicMerge:
+- manager_config_patch.yaml
+
+images:
+- name: quay.io/llamastack/llama-stack-k8s-operator
+  newTag: v0.1.0
+```
+
+## Namespace Configuration
+
+### Default Namespace
+
+By default, the operator watches all namespaces. To restrict to specific namespaces:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llamastack-operator-controller-manager
+spec:
+  template:
+    spec:
+      containers:
+      - name: manager
+        env:
+        - name: WATCH_NAMESPACE
+          value: "llamastack-system,production"
+```
+
+### Multi-tenant Setup
+
+For multi-tenant environments, install the operator with namespace restrictions:
+
+```bash
+# Install operator in tenant namespace
+kubectl create namespace tenant-a
+kubectl apply -f operator.yaml -n tenant-a
+
+# Configure RBAC for tenant isolation
+kubectl apply -f tenant-rbac.yaml
+```
+
+## Security Configuration
+
+### RBAC
+
+The operator requires the following permissions:
+
+```yaml
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: llamastack-operator-manager-role
+rules:
+- apiGroups: ["llamastack.io"]
+  resources: ["llamastackdistributions"]
+  verbs: ["create", "delete", "get", "list", "patch", "update", "watch"]
+- apiGroups: ["apps"]
+  resources: ["deployments"]
+  verbs: ["create", "delete", "get", "list", "patch", "update", "watch"]
+- apiGroups: [""]
+  resources: ["services", "configmaps", "persistentvolumeclaims"]
+  verbs: ["create", "delete", "get", "list", "patch", "update", "watch"]
+```
+
+### Network Policies
+
+Secure your deployment with network policies:
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: llamastack-operator-netpol
+  namespace: llama-stack-k8s-operator-system
+spec:
+  podSelector:
+    matchLabels:
+      control-plane: controller-manager
+  policyTypes:
+  - Ingress
+  - Egress
+  ingress:
+  - from:
+    - namespaceSelector: {}
+    ports:
+    - protocol: TCP
+      port: 8080
+    - protocol: TCP
+      port: 8081
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**1. CRD Installation Failed**
+```bash
+# Check if CRDs exist
+kubectl get crd | grep llamastack
+
+# Manually install CRDs
+kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/config/crd/bases/llamastack.io_llamastackdistributions.yaml
+```
+
+**2. Operator Pod Not Starting**
+```bash
+# Check pod status
+kubectl get pods -n llama-stack-k8s-operator-system
+
+# Check events
+kubectl describe pod -n llama-stack-k8s-operator-system <pod-name>
+
+# Check logs
+kubectl logs -n llama-stack-k8s-operator-system <pod-name>
+```
+
+**3. Permission Denied Errors**
+```bash
+# Check RBAC configuration
+kubectl auth can-i create llamastackdistributions --as=system:serviceaccount:llama-stack-k8s-operator-system:llama-stack-k8s-operator-controller-manager
+
+# Verify service account
+kubectl get serviceaccount -n llama-stack-k8s-operator-system
+```
+
+### Debug Mode
+
+Enable debug logging for troubleshooting:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llamastack-operator-controller-manager
+spec:
+  template:
+    spec:
+      containers:
+      - name: manager
+        env:
+        - name: LOG_LEVEL
+          value: "debug"
+```
+
+## Uninstallation
+
+To remove the operator:
+
+```bash
+# Delete operator deployment
+kubectl delete -f https://github.com/llamastack/llama-stack-k8s-operator/releases/latest/download/operator.yaml
+
+# Clean up CRDs (this will delete all LlamaStackDistribution resources)
+kubectl delete crd llamastackdistributions.llamastack.io
+```
+
+!!! warning "Data Loss Warning"
+    Deleting the CRD will remove all LlamaStackDistribution resources and their associated data. Make sure to backup any important configurations before uninstalling.
+
+## Next Steps
+
+After successful installation:
+
+1. [Deploy your first LlamaStack instance](quick-start.md)
+2. [Learn about configuration options](configuration.md)
+3. [Explore examples](../examples/basic-deployment.md)
diff --git a/docs/content/getting-started/quick-start.md b/docs/content/getting-started/quick-start.md
new file mode 100644
index 000000000..a17733b84
--- /dev/null
+++ b/docs/content/getting-started/quick-start.md
@@ -0,0 +1,407 @@
+# Quick Start Guide
+
+This guide will help you deploy your first LlamaStack instance using the Kubernetes operator in just a few minutes.
+
+## Prerequisites
+
+- LlamaStack Operator installed ([Installation Guide](installation.md))
+- kubectl configured and connected to your cluster
+- At least 4GB of available memory in your cluster
+
+## Step 1: Create a Basic LlamaStack Instance
+
+Create a file named `basic-llamastack.yaml`:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-first-llamastack
+  namespace: default
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "starter"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "2Gi"
+          cpu: "500m"
+        limits:
+          memory: "4Gi"
+          cpu: "1"
+```
+
+## Step 2: Deploy the Instance
+
+Apply the configuration to your cluster:
+
+```bash
+kubectl apply -f basic-llamastack.yaml
+```
+
+## Step 3: Monitor the Deployment
+
+Watch the deployment progress:
+
+```bash
+# Check the LlamaStackDistribution status
+kubectl get llamastackdistribution my-first-llamastack
+
+# Watch the pods being created
+kubectl get pods -l app=llama-stack -w
+
+# Check deployment status
+kubectl get deployment my-first-llamastack
+```
+
+Expected output:
+```
+NAME                  READY   STATUS    RESTARTS   AGE
+my-first-llamastack   1/1     Running   0          2m
+```
+
+## Step 4: Access Your LlamaStack Instance
+
+### Port Forward (Development)
+
+For development and testing, use port forwarding:
+
+```bash
+kubectl port-forward service/my-first-llamastack 8321:8321
+```
+
+Now you can access LlamaStack at `http://localhost:8321`.
+
+### Service Exposure (Production)
+
+For production access, expose the service:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: my-first-llamastack-external
+spec:
+  type: LoadBalancer
+  selector:
+    app: llama-stack
+    llamastack.io/instance: my-first-llamastack
+  ports:
+  - port: 80
+    targetPort: 8321
+    protocol: TCP
+```
+
+Apply the service:
+```bash
+kubectl apply -f service.yaml
+```
+
+## Step 5: Test the API
+
+Test that your LlamaStack instance is working:
+
+```bash
+# Health check
+curl http://localhost:8321/health
+
+# List available providers
+curl http://localhost:8321/providers
+
+# Get distribution info
+curl http://localhost:8321/distribution/info
+```
+
+Expected response for health check:
+```json
+{
+  "status": "healthy",
+  "version": "0.0.1",
+  "distribution": "meta-reference"
+}
+```
+
+## Step 6: Explore the API
+
+LlamaStack provides a comprehensive API for AI applications. Here are some key endpoints:
+
+### Models API
+```bash
+# List available models
+curl http://localhost:8321/models
+
+# Get model info
+curl http://localhost:8321/models/{model_id}
+```
+
+### Inference API
+```bash
+# Text completion
+curl -X POST http://localhost:8321/inference/completion \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "meta-llama/Llama-2-7b-chat-hf",
+    "prompt": "Hello, how are you?",
+    "max_tokens": 100
+  }'
+```
+
+### Memory API
+```bash
+# Create memory bank
+curl -X POST http://localhost:8321/memory/create \
+  -H "Content-Type: application/json" \
+  -d '{
+    "bank_id": "my-memory",
+    "config": {
+      "type": "vector",
+      "embedding_model": "all-MiniLM-L6-v2"
+    }
+  }'
+```
+
+## Configuration Examples
+
+### Custom Distribution
+
+Use a different LlamaStack distribution:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: ollama-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "ollama"
+    containerSpec:
+      port: 8321
+      env:
+      - name: OLLAMA_HOST
+        value: "0.0.0.0"
+```
+
+### Custom Container Image
+
+Use your own LlamaStack image:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      image: "my-registry.com/llamastack:custom"
+    containerSpec:
+      port: 8321
+```
+
+### With Persistent Storage
+
+Add persistent storage for models and data:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: persistent-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "meta-reference"
+    containerSpec:
+      port: 8321
+    storage:
+      size: "50Gi"
+      mountPath: "/.llama"
+```
+
+### High Availability Setup
+
+Deploy multiple replicas with load balancing:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: ha-llamastack
+spec:
+  replicas: 3
+  server:
+    distribution:
+      name: "meta-reference"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          memory: "4Gi"
+          cpu: "1"
+        limits:
+          memory: "8Gi"
+          cpu: "2"
+    storage:
+      size: "100Gi"
+      mountPath: "/.llama"
+```
+
+## Monitoring and Observability
+
+### Check Resource Usage
+
+Monitor resource consumption:
+
+```bash
+# Pod resource usage
+kubectl top pods -l app=llama-stack
+
+# Node resource usage
+kubectl top nodes
+```
+
+### View Logs
+
+Access application logs:
+
+```bash
+# View recent logs
+kubectl logs deployment/my-first-llamastack
+
+# Follow logs in real-time
+kubectl logs -f deployment/my-first-llamastack
+
+# View logs from all replicas
+kubectl logs -l app=llama-stack --tail=100
+```
+
+### Health Checks
+
+The operator automatically configures health checks:
+
+```yaml
+# Readiness probe
+readinessProbe:
+  httpGet:
+    path: /health
+    port: 8321
+  initialDelaySeconds: 30
+  periodSeconds: 10
+
+# Liveness probe
+livenessProbe:
+  httpGet:
+    path: /health
+    port: 8321
+  initialDelaySeconds: 60
+  periodSeconds: 30
+```
+
+## Scaling
+
+### Manual Scaling
+
+Scale your deployment manually:
+
+```bash
+# Scale to 3 replicas
+kubectl patch llamastackdistribution my-first-llamastack -p '{"spec":{"replicas":3}}'
+
+# Verify scaling
+kubectl get pods -l app=llama-stack
+```
+
+### Resource Updates
+
+Update resource allocations:
+
+```bash
+kubectl patch llamastackdistribution my-first-llamastack -p '{
+  "spec": {
+    "server": {
+      "containerSpec": {
+        "resources": {
+          "requests": {
+            "memory": "4Gi",
+            "cpu": "1"
+          },
+          "limits": {
+            "memory": "8Gi",
+            "cpu": "2"
+          }
+        }
+      }
+    }
+  }
+}'
+```
+
+## Cleanup
+
+When you're done experimenting, clean up the resources:
+
+```bash
+# Delete the LlamaStack instance
+kubectl delete llamastackdistribution my-first-llamastack
+
+# Delete any additional services
+kubectl delete service my-first-llamastack-external
+
+# Verify cleanup
+kubectl get pods -l app=llama-stack
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**Pod not starting:**
+```bash
+# Check pod events
+kubectl describe pod <pod-name>
+
+# Check resource constraints
+kubectl describe node <node-name>
+```
+
+**Service not accessible:**
+```bash
+# Check service endpoints
+kubectl get endpoints my-first-llamastack
+
+# Verify port forwarding
+kubectl port-forward service/my-first-llamastack 8321:8321 --address 0.0.0.0
+```
+
+**API errors:**
+```bash
+# Check application logs
+kubectl logs deployment/my-first-llamastack
+
+# Verify configuration
+kubectl get llamastackdistribution my-first-llamastack -o yaml
+```
+
+## Next Steps
+
+Now that you have a working LlamaStack instance:
+
+1. **[Learn about configuration options](configuration.md)** - Explore advanced configuration
+2. **[Check out examples](../examples/basic-deployment.md)** - See real-world use cases
+3. **[Read the API reference](../reference/api.md)** - Understand all available options
+4. **[Set up monitoring](../how-to/monitoring.md)** - Add observability to your deployment
+
+## Getting Help
+
+If you encounter issues:
+
+- Check the [troubleshooting guide](../how-to/troubleshooting.md)
+- Review [GitHub issues](https://github.com/llamastack/llama-stack-k8s-operator/issues)
+- Join the [community discussions](https://github.com/llamastack/llama-stack-k8s-operator/discussions)
diff --git a/docs/content/how-to/configure-storage.md b/docs/content/how-to/configure-storage.md
new file mode 100644
index 000000000..216ce5fc2
--- /dev/null
+++ b/docs/content/how-to/configure-storage.md
@@ -0,0 +1,265 @@
+# Configure Storage
+
+Learn how to configure persistent storage for your LlamaStack distributions.
+
+## Storage Overview
+
+LlamaStack distributions can use persistent storage for:
+
+- Model files and weights
+- Configuration data
+- Logs and metrics
+- User data and sessions
+
+## Basic Storage Configuration
+
+### Default Storage
+
+By default, LlamaStack uses ephemeral storage:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: basic-llamastack
+spec:
+  image: llamastack/llamastack:latest
+  # No storage configuration = ephemeral storage
+```
+
+### Persistent Storage
+
+Enable persistent storage:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: persistent-llamastack
+spec:
+  image: llamastack/llamastack:latest
+  storage:
+    size: "50Gi"
+    storageClass: "standard"
+    accessMode: "ReadWriteOnce"
+```
+
+## Storage Classes
+
+### Available Storage Classes
+
+Common storage classes and their use cases:
+
+| Storage Class | Performance | Use Case |
+|---------------|-------------|----------|
+| `standard` | Standard | General purpose |
+| `fast-ssd` | High | Model inference |
+| `slow-hdd` | Low | Archival storage |
+
+### Custom Storage Class
+
+Create a custom storage class for LlamaStack:
+
+```yaml
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: llamastack-storage
+provisioner: kubernetes.io/aws-ebs
+parameters:
+  type: gp3
+  iops: "3000"
+  throughput: "125"
+allowVolumeExpansion: true
+```
+
+## Advanced Storage Configurations
+
+### Multiple Volumes
+
+Configure separate volumes for different purposes:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: multi-volume-llamastack
+spec:
+  image: llamastack/llamastack:latest
+  storage:
+    models:
+      size: "100Gi"
+      storageClass: "fast-ssd"
+      mountPath: "/models"
+    data:
+      size: "50Gi"
+      storageClass: "standard"
+      mountPath: "/data"
+    logs:
+      size: "10Gi"
+      storageClass: "standard"
+      mountPath: "/logs"
+```
+
+### Shared Storage
+
+Configure shared storage across replicas:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: shared-storage-llamastack
+spec:
+  image: llamastack/llamastack:latest
+  replicas: 3
+  storage:
+    size: "200Gi"
+    storageClass: "nfs"
+    accessMode: "ReadWriteMany"  # Allows multiple pods to mount
+```
+
+## Storage Optimization
+
+### Performance Tuning
+
+Optimize storage for model inference:
+
+```yaml
+spec:
+  storage:
+    size: "500Gi"
+    storageClass: "nvme-ssd"
+    iops: 10000
+    throughput: "1000MB/s"
+```
+
+### Cost Optimization
+
+Use tiered storage for cost efficiency:
+
+```yaml
+spec:
+  storage:
+    hot:
+      size: "50Gi"
+      storageClass: "fast-ssd"
+      mountPath: "/models/active"
+    warm:
+      size: "200Gi"
+      storageClass: "standard"
+      mountPath: "/models/cache"
+    cold:
+      size: "1Ti"
+      storageClass: "slow-hdd"
+      mountPath: "/models/archive"
+```
+
+## Backup and Recovery
+
+### Automated Backups
+
+Configure automated backups:
+
+```yaml
+spec:
+  backup:
+    enabled: true
+    schedule: "0 2 * * *"  # Daily at 2 AM
+    retention: "30d"
+    destination: "s3://my-backup-bucket"
+```
+
+### Manual Backup
+
+Create manual backups:
+
+```bash
+# Create a snapshot
+kubectl create volumesnapshot llamastack-backup \
+  --from-pvc=llamastack-storage
+
+# Restore from snapshot
+kubectl apply -f - <<EOF
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: llamastack-restored
+spec:
+  dataSource:
+    name: llamastack-backup
+    kind: VolumeSnapshot
+    apiGroup: snapshot.storage.k8s.io
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 50Gi
+EOF
+```
+
+## Monitoring Storage
+
+### Storage Metrics
+
+Monitor storage usage:
+
+```bash
+# Check PVC status
+kubectl get pvc
+
+# Check storage usage
+kubectl exec -it <pod-name> -- df -h
+
+# Check I/O metrics
+kubectl top pods --containers
+```
+
+### Alerts
+
+Set up storage alerts:
+
+```yaml
+# Prometheus alert for high storage usage
+- alert: HighStorageUsage
+  expr: kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes > 0.8
+  for: 5m
+  labels:
+    severity: warning
+  annotations:
+    summary: "High storage usage on {{ $labels.persistentvolumeclaim }}"
+```
+
+## Troubleshooting
+
+### Common Issues
+
+**PVC Stuck in Pending:**
+```bash
+# Check storage class
+kubectl get storageclass
+
+# Check events
+kubectl describe pvc <pvc-name>
+```
+
+**Out of Space:**
+```bash
+# Expand volume (if supported)
+kubectl patch pvc <pvc-name> -p '{"spec":{"resources":{"requests":{"storage":"100Gi"}}}}'
+```
+
+**Performance Issues:**
+```bash
+# Check I/O wait
+kubectl exec -it <pod-name> -- iostat -x 1
+
+# Check storage class parameters
+kubectl describe storageclass <storage-class>
+```
+
+## Next Steps
+
+- [Scaling Guide](scaling.md)
+- [Monitoring Setup](monitoring.md)
+- [Troubleshooting](troubleshooting.md)
diff --git a/docs/content/how-to/deploy-llamastack.md b/docs/content/how-to/deploy-llamastack.md
new file mode 100644
index 000000000..50ef66dde
--- /dev/null
+++ b/docs/content/how-to/deploy-llamastack.md
@@ -0,0 +1,133 @@
+# Deploy LlamaStack
+
+This guide walks you through deploying a LlamaStack distribution using the Kubernetes operator.
+
+## Prerequisites
+
+- Kubernetes cluster (v1.20+)
+- LlamaStack Kubernetes Operator installed
+- `kubectl` configured to access your cluster
+
+## Basic Deployment
+
+### 1. Create a LlamaStackDistribution
+
+Create a basic LlamaStack distribution:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: my-llamastack
+  namespace: default
+spec:
+  image: llamastack/llamastack:latest
+  replicas: 1
+  resources:
+    requests:
+      cpu: "1"
+      memory: "2Gi"
+    limits:
+      cpu: "2"
+      memory: "4Gi"
+```
+
+### 2. Apply the Configuration
+
+```bash
+kubectl apply -f llamastack-distribution.yaml
+```
+
+### 3. Verify Deployment
+
+Check the status of your deployment:
+
+```bash
+# Check the distribution
+kubectl get llamastackdistribution my-llamastack
+
+# Check the pods
+kubectl get pods -l app=my-llamastack
+
+# Check logs
+kubectl logs -l app=my-llamastack
+```
+
+## Advanced Deployment Options
+
+### With Persistent Storage
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: llamastack-with-storage
+spec:
+  image: llamastack/llamastack:latest
+  storage:
+    size: "20Gi"
+    storageClass: "fast-ssd"
+  persistence:
+    enabled: true
+```
+
+### With Custom Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: llamastack-custom
+spec:
+  image: llamastack/llamastack:latest
+  config:
+    models:
+      - name: "llama2-7b"
+        path: "/models/llama2-7b"
+    inference:
+      provider: "meta-reference"
+```
+
+## Scaling
+
+### Horizontal Scaling
+
+Scale your deployment by adjusting replicas:
+
+```bash
+kubectl patch llamastackdistribution my-llamastack -p '{"spec":{"replicas":3}}'
+```
+
+### Vertical Scaling
+
+Update resource limits:
+
+```yaml
+spec:
+  resources:
+    requests:
+      cpu: "2"
+      memory: "4Gi"
+    limits:
+      cpu: "4"
+      memory: "8Gi"
+```
+
+## Monitoring
+
+Monitor your deployment:
+
+```bash
+# Check resource usage
+kubectl top pods -l app=my-llamastack
+
+# Check events
+kubectl get events --field-selector involvedObject.name=my-llamastack
+```
+
+## Next Steps
+
+- [Configure Storage](configure-storage.md)
+- [Set up Monitoring](monitoring.md)
+- [Scaling Guide](scaling.md)
+- [Troubleshooting](troubleshooting.md)
diff --git a/docs/content/how-to/monitoring.md b/docs/content/how-to/monitoring.md
new file mode 100644
index 000000000..64985cd35
--- /dev/null
+++ b/docs/content/how-to/monitoring.md
@@ -0,0 +1,407 @@
+# Monitoring
+
+Set up comprehensive monitoring for your LlamaStack distributions.
+
+## Monitoring Overview
+
+Monitor your LlamaStack deployments with:
+
+- **Metrics**: Performance and resource usage
+- **Logs**: Application and system logs
+- **Alerts**: Proactive issue detection
+- **Dashboards**: Visual monitoring
+
+## Metrics Collection
+
+### Prometheus Setup
+
+Deploy Prometheus for metrics collection:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: prometheus-config
+data:
+  prometheus.yml: |
+    global:
+      scrape_interval: 15s
+    scrape_configs:
+    - job_name: 'llamastack'
+      kubernetes_sd_configs:
+      - role: pod
+      relabel_configs:
+      - source_labels: [__meta_kubernetes_pod_label_app]
+        action: keep
+        regex: llamastack
+```
+
+### ServiceMonitor
+
+Create a ServiceMonitor for automatic discovery:
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: llamastack-monitor
+spec:
+  selector:
+    matchLabels:
+      app: llamastack
+  endpoints:
+  - port: metrics
+    interval: 30s
+    path: /metrics
+```
+
+## Key Metrics
+
+### Application Metrics
+
+Monitor LlamaStack-specific metrics:
+
+```yaml
+# Custom metrics exposed by LlamaStack
+llamastack_requests_total
+llamastack_request_duration_seconds
+llamastack_active_connections
+llamastack_model_load_time_seconds
+llamastack_inference_latency_seconds
+```
+
+### Resource Metrics
+
+Track resource usage:
+
+```yaml
+# CPU and Memory
+container_cpu_usage_seconds_total
+container_memory_usage_bytes
+container_memory_working_set_bytes
+
+# Network
+container_network_receive_bytes_total
+container_network_transmit_bytes_total
+
+# Storage
+kubelet_volume_stats_used_bytes
+kubelet_volume_stats_capacity_bytes
+```
+
+## Logging
+
+### Centralized Logging
+
+Set up log aggregation with Fluentd:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: fluentd-config
+data:
+  fluent.conf: |
+    <source>
+      @type tail
+      path /var/log/containers/*llamastack*.log
+      pos_file /var/log/fluentd-containers.log.pos
+      tag kubernetes.*
+      format json
+    </source>
+    
+    <match kubernetes.**>
+      @type elasticsearch
+      host elasticsearch.logging.svc.cluster.local
+      port 9200
+      index_name llamastack-logs
+    </match>
+```
+
+### Log Levels
+
+Configure appropriate log levels:
+
+```yaml
+spec:
+  env:
+  - name: LOG_LEVEL
+    value: "info"  # debug, info, warn, error
+  - name: LOG_FORMAT
+    value: "json"  # json, text
+```
+
+## Dashboards
+
+### Grafana Dashboard
+
+Create a comprehensive dashboard:
+
+```json
+{
+  "dashboard": {
+    "title": "LlamaStack Monitoring",
+    "panels": [
+      {
+        "title": "Request Rate",
+        "type": "graph",
+        "targets": [
+          {
+            "expr": "rate(llamastack_requests_total[5m])",
+            "legendFormat": "{{instance}}"
+          }
+        ]
+      },
+      {
+        "title": "Response Time",
+        "type": "graph",
+        "targets": [
+          {
+            "expr": "histogram_quantile(0.95, rate(llamastack_request_duration_seconds_bucket[5m]))",
+            "legendFormat": "95th percentile"
+          }
+        ]
+      },
+      {
+        "title": "Resource Usage",
+        "type": "graph",
+        "targets": [
+          {
+            "expr": "rate(container_cpu_usage_seconds_total[5m])",
+            "legendFormat": "CPU"
+          },
+          {
+            "expr": "container_memory_usage_bytes",
+            "legendFormat": "Memory"
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+## Alerting
+
+### Prometheus Alerts
+
+Define critical alerts:
+
+```yaml
+groups:
+- name: llamastack.rules
+  rules:
+  - alert: LlamaStackDown
+    expr: up{job="llamastack"} == 0
+    for: 1m
+    labels:
+      severity: critical
+    annotations:
+      summary: "LlamaStack instance is down"
+      description: "LlamaStack instance {{ $labels.instance }} has been down for more than 1 minute."
+
+  - alert: HighErrorRate
+    expr: rate(llamastack_requests_total{status=~"5.."}[5m]) > 0.1
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: "High error rate detected"
+      description: "Error rate is {{ $value }} errors per second."
+
+  - alert: HighLatency
+    expr: histogram_quantile(0.95, rate(llamastack_request_duration_seconds_bucket[5m])) > 2
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: "High latency detected"
+      description: "95th percentile latency is {{ $value }} seconds."
+
+  - alert: HighMemoryUsage
+    expr: container_memory_usage_bytes / container_spec_memory_limit_bytes > 0.9
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: "High memory usage"
+      description: "Memory usage is above 90%."
+```
+
+### AlertManager Configuration
+
+Configure alert routing:
+
+```yaml
+global:
+  smtp_smarthost: 'localhost:587'
+  smtp_from: 'alerts@example.com'
+
+route:
+  group_by: ['alertname']
+  group_wait: 10s
+  group_interval: 10s
+  repeat_interval: 1h
+  receiver: 'web.hook'
+
+receivers:
+- name: 'web.hook'
+  email_configs:
+  - to: 'admin@example.com'
+    subject: 'LlamaStack Alert: {{ .GroupLabels.alertname }}'
+    body: |
+      {{ range .Alerts }}
+      Alert: {{ .Annotations.summary }}
+      Description: {{ .Annotations.description }}
+      {{ end }}
+```
+
+## Health Checks
+
+### Liveness Probe
+
+Configure liveness probes:
+
+```yaml
+spec:
+  containers:
+  - name: llamastack
+    livenessProbe:
+      httpGet:
+        path: /health
+        port: 8080
+      initialDelaySeconds: 30
+      periodSeconds: 10
+      timeoutSeconds: 5
+      failureThreshold: 3
+```
+
+### Readiness Probe
+
+Configure readiness probes:
+
+```yaml
+spec:
+  containers:
+  - name: llamastack
+    readinessProbe:
+      httpGet:
+        path: /ready
+        port: 8080
+      initialDelaySeconds: 5
+      periodSeconds: 5
+      timeoutSeconds: 3
+      failureThreshold: 3
+```
+
+## Performance Monitoring
+
+### Custom Metrics
+
+Expose custom application metrics:
+
+```python
+# Example Python code for custom metrics
+from prometheus_client import Counter, Histogram, Gauge
+
+REQUEST_COUNT = Counter('llamastack_requests_total', 'Total requests', ['method', 'endpoint'])
+REQUEST_LATENCY = Histogram('llamastack_request_duration_seconds', 'Request latency')
+ACTIVE_CONNECTIONS = Gauge('llamastack_active_connections', 'Active connections')
+
+# In your application code
+REQUEST_COUNT.labels(method='POST', endpoint='/inference').inc()
+REQUEST_LATENCY.observe(response_time)
+ACTIVE_CONNECTIONS.set(current_connections)
+```
+
+### Distributed Tracing
+
+Set up distributed tracing with Jaeger:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: jaeger-config
+data:
+  config.yaml: |
+    jaeger:
+      endpoint: "http://jaeger-collector:14268/api/traces"
+      service_name: "llamastack"
+      sampler:
+        type: "probabilistic"
+        param: 0.1
+```
+
+## Monitoring Best Practices
+
+### Resource Monitoring
+
+Monitor these key resources:
+
+```bash
+# CPU usage
+kubectl top pods -l app=llamastack
+
+# Memory usage
+kubectl top pods -l app=llamastack --containers
+
+# Storage usage
+kubectl exec -it <pod> -- df -h
+
+# Network usage
+kubectl exec -it <pod> -- netstat -i
+```
+
+### Log Analysis
+
+Analyze logs for issues:
+
+```bash
+# Check error logs
+kubectl logs -l app=llamastack | grep ERROR
+
+# Check recent logs
+kubectl logs -l app=llamastack --since=1h
+
+# Follow logs in real-time
+kubectl logs -f -l app=llamastack
+```
+
+## Troubleshooting Monitoring
+
+### Common Issues
+
+**Metrics Not Appearing:**
+```bash
+# Check ServiceMonitor
+kubectl get servicemonitor
+
+# Check Prometheus targets
+kubectl port-forward svc/prometheus 9090:9090
+# Visit http://localhost:9090/targets
+```
+
+**High Resource Usage:**
+```bash
+# Check resource limits
+kubectl describe pod <pod-name>
+
+# Check node resources
+kubectl describe node <node-name>
+```
+
+**Alert Fatigue:**
+```bash
+# Review alert thresholds
+kubectl get prometheusrule
+
+# Check alert history
+kubectl logs -l app=alertmanager
+```
+
+## Next Steps
+
+- [Troubleshooting Guide](troubleshooting.md)
+- [Scaling Guide](scaling.md)
+- [Configure Storage](configure-storage.md)
diff --git a/docs/content/how-to/scaling.md b/docs/content/how-to/scaling.md
new file mode 100644
index 000000000..7a7819768
--- /dev/null
+++ b/docs/content/how-to/scaling.md
@@ -0,0 +1,326 @@
+# Scaling
+
+Learn how to scale your LlamaStack distributions for optimal performance and cost efficiency.
+
+## Scaling Overview
+
+LlamaStack supports both horizontal and vertical scaling:
+
+- **Horizontal Scaling**: Add more replicas
+- **Vertical Scaling**: Increase resources per replica
+- **Auto Scaling**: Automatic scaling based on metrics
+
+## Horizontal Scaling
+
+### Manual Scaling
+
+Scale replicas manually:
+
+```bash
+# Scale to 3 replicas
+kubectl patch llamastackdistribution my-llamastack \
+  -p '{"spec":{"replicas":3}}'
+
+# Or edit the resource directly
+kubectl edit llamastackdistribution my-llamastack
+```
+
+### Declarative Scaling
+
+Update your YAML configuration:
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: scaled-llamastack
+spec:
+  image: llamastack/llamastack:latest
+  replicas: 5  # Scale to 5 replicas
+  resources:
+    requests:
+      cpu: "1"
+      memory: "2Gi"
+```
+
+## Vertical Scaling
+
+### Resource Adjustment
+
+Increase CPU and memory:
+
+```yaml
+spec:
+  resources:
+    requests:
+      cpu: "2"      # Increased from 1
+      memory: "4Gi" # Increased from 2Gi
+    limits:
+      cpu: "4"      # Increased from 2
+      memory: "8Gi" # Increased from 4Gi
+```
+
+### GPU Scaling
+
+Add GPU resources:
+
+```yaml
+spec:
+  resources:
+    requests:
+      nvidia.com/gpu: "1"
+    limits:
+      nvidia.com/gpu: "2"
+```
+
+## Auto Scaling
+
+### Horizontal Pod Autoscaler (HPA)
+
+Create an HPA for automatic scaling:
+
+```yaml
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: llamastack-hpa
+spec:
+  scaleTargetRef:
+    apiVersion: llamastack.io/v1alpha1
+    kind: LlamaStackDistribution
+    name: my-llamastack
+  minReplicas: 2
+  maxReplicas: 10
+  metrics:
+  - type: Resource
+    resource:
+      name: cpu
+      target:
+        type: Utilization
+        averageUtilization: 70
+  - type: Resource
+    resource:
+      name: memory
+      target:
+        type: Utilization
+        averageUtilization: 80
+```
+
+### Vertical Pod Autoscaler (VPA)
+
+Enable automatic resource adjustment:
+
+```yaml
+apiVersion: autoscaling.k8s.io/v1
+kind: VerticalPodAutoscaler
+metadata:
+  name: llamastack-vpa
+spec:
+  targetRef:
+    apiVersion: llamastack.io/v1alpha1
+    kind: LlamaStackDistribution
+    name: my-llamastack
+  updatePolicy:
+    updateMode: "Auto"
+  resourcePolicy:
+    containerPolicies:
+    - containerName: llamastack
+      maxAllowed:
+        cpu: "4"
+        memory: "8Gi"
+      minAllowed:
+        cpu: "100m"
+        memory: "128Mi"
+```
+
+## Performance Considerations
+
+### Load Balancing
+
+Configure load balancing for multiple replicas:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: llamastack-service
+spec:
+  selector:
+    app: my-llamastack
+  ports:
+  - port: 8080
+    targetPort: 8080
+  type: LoadBalancer
+  sessionAffinity: None  # Round-robin
+```
+
+### Resource Requests vs Limits
+
+Best practices for resource configuration:
+
+```yaml
+spec:
+  resources:
+    requests:
+      cpu: "1"      # Guaranteed resources
+      memory: "2Gi"
+    limits:
+      cpu: "2"      # Maximum allowed (2x requests)
+      memory: "4Gi" # Maximum allowed (2x requests)
+```
+
+## Monitoring Scaling
+
+### Scaling Metrics
+
+Monitor key scaling metrics:
+
+```bash
+# Check HPA status
+kubectl get hpa
+
+# Check resource usage
+kubectl top pods -l app=my-llamastack
+
+# Check scaling events
+kubectl describe hpa llamastack-hpa
+```
+
+### Custom Metrics
+
+Scale based on custom metrics:
+
+```yaml
+metrics:
+- type: Pods
+  pods:
+    metric:
+      name: requests_per_second
+    target:
+      type: AverageValue
+      averageValue: "100"
+```
+
+## Scaling Strategies
+
+### Blue-Green Scaling
+
+Deploy new version alongside old:
+
+```yaml
+# Blue deployment (current)
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: llamastack-blue
+spec:
+  image: llamastack/llamastack:v1.0
+  replicas: 3
+
+---
+# Green deployment (new)
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: llamastack-green
+spec:
+  image: llamastack/llamastack:v1.1
+  replicas: 3
+```
+
+### Canary Scaling
+
+Gradual rollout with traffic splitting:
+
+```yaml
+# Main deployment (90% traffic)
+spec:
+  replicas: 9
+  version: "stable"
+
+---
+# Canary deployment (10% traffic)
+spec:
+  replicas: 1
+  version: "canary"
+```
+
+## Cost Optimization
+
+### Spot Instances
+
+Use spot instances for cost savings:
+
+```yaml
+spec:
+  nodeSelector:
+    node-type: "spot"
+  tolerations:
+  - key: "spot"
+    operator: "Equal"
+    value: "true"
+    effect: "NoSchedule"
+```
+
+### Scheduled Scaling
+
+Scale down during off-hours:
+
+```yaml
+# CronJob for scaling down
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: scale-down-llamastack
+spec:
+  schedule: "0 18 * * *"  # 6 PM daily
+  jobTemplate:
+    spec:
+      template:
+        spec:
+          containers:
+          - name: kubectl
+            image: bitnami/kubectl
+            command:
+            - kubectl
+            - patch
+            - llamastackdistribution
+            - my-llamastack
+            - -p
+            - '{"spec":{"replicas":1}}'
+```
+
+## Troubleshooting Scaling
+
+### Common Issues
+
+**Pods Not Scaling:**
+```bash
+# Check HPA conditions
+kubectl describe hpa llamastack-hpa
+
+# Check resource metrics
+kubectl top nodes
+kubectl top pods
+```
+
+**Resource Constraints:**
+```bash
+# Check node capacity
+kubectl describe nodes
+
+# Check resource quotas
+kubectl describe resourcequota
+```
+
+**Scaling Too Aggressive:**
+```bash
+# Adjust HPA behavior
+kubectl patch hpa llamastack-hpa -p '{"spec":{"behavior":{"scaleUp":{"stabilizationWindowSeconds":300}}}}'
+```
+
+## Next Steps
+
+- [Monitoring Setup](monitoring.md)
+- [Configure Storage](configure-storage.md)
+- [Troubleshooting](troubleshooting.md)
diff --git a/docs/content/how-to/troubleshooting.md b/docs/content/how-to/troubleshooting.md
new file mode 100644
index 000000000..652353ada
--- /dev/null
+++ b/docs/content/how-to/troubleshooting.md
@@ -0,0 +1,371 @@
+# Troubleshooting
+
+Common issues and solutions for LlamaStack Kubernetes Operator.
+
+## Quick Diagnostics
+
+### Check Operator Status
+
+```bash
+# Check operator pod
+kubectl get pods -n llamastack-system
+
+# Check operator logs
+kubectl logs -n llamastack-system -l app=llamastack-operator
+
+# Check CRD installation
+kubectl get crd llamastackdistributions.llamastack.io
+```
+
+### Check Distribution Status
+
+```bash
+# List all distributions
+kubectl get llamastackdistribution
+
+# Check specific distribution
+kubectl describe llamastackdistribution <name>
+
+# Check distribution events
+kubectl get events --field-selector involvedObject.name=<distribution-name>
+```
+
+## Common Issues
+
+### 1. Operator Not Starting
+
+**Symptoms:**
+- Operator pod in CrashLoopBackOff
+- No CRDs created
+
+**Diagnosis:**
+```bash
+kubectl logs -n llamastack-system -l app=llamastack-operator
+kubectl describe pod -n llamastack-system -l app=llamastack-operator
+```
+
+**Solutions:**
+```bash
+# Check RBAC permissions
+kubectl auth can-i create llamastackdistributions --as=system:serviceaccount:llamastack-system:llamastack-operator
+
+# Reinstall operator
+kubectl delete -f operator.yaml
+kubectl apply -f operator.yaml
+
+# Check resource limits
+kubectl describe pod -n llamastack-system -l app=llamastack-operator
+```
+
+### 2. Distribution Not Creating Pods
+
+**Symptoms:**
+- LlamaStackDistribution exists but no pods created
+- Status shows "Pending" or "Failed"
+
+**Diagnosis:**
+```bash
+kubectl describe llamastackdistribution <name>
+kubectl get events --field-selector involvedObject.name=<name>
+```
+
+**Solutions:**
+```bash
+# Check image availability
+kubectl run test --image=<llamastack-image> --dry-run=client
+
+# Check resource quotas
+kubectl describe resourcequota
+
+# Check node capacity
+kubectl describe nodes
+```
+
+### 3. Pods Failing to Start
+
+**Symptoms:**
+- Pods in CrashLoopBackOff or Error state
+- Container exits immediately
+
+**Diagnosis:**
+```bash
+kubectl logs <pod-name>
+kubectl describe pod <pod-name>
+kubectl get events --field-selector involvedObject.name=<pod-name>
+```
+
+**Solutions:**
+```bash
+# Check image pull secrets
+kubectl get secrets
+
+# Check resource limits
+kubectl describe pod <pod-name>
+
+# Check volume mounts
+kubectl exec -it <pod-name> -- ls -la /mnt
+```
+
+### 4. Storage Issues
+
+**Symptoms:**
+- PVCs stuck in Pending
+- Pods can't mount volumes
+- Out of disk space
+
+**Diagnosis:**
+```bash
+kubectl get pvc
+kubectl describe pvc <pvc-name>
+kubectl get storageclass
+```
+
+**Solutions:**
+```bash
+# Check storage class
+kubectl describe storageclass <storage-class>
+
+# Check available storage
+kubectl get nodes -o custom-columns=NAME:.metadata.name,CAPACITY:.status.capacity.storage
+
+# Expand volume (if supported)
+kubectl patch pvc <pvc-name> -p '{"spec":{"resources":{"requests":{"storage":"100Gi"}}}}'
+```
+
+### 5. Network Connectivity Issues
+
+**Symptoms:**
+- Services not accessible
+- Pods can't communicate
+- External traffic not reaching pods
+
+**Diagnosis:**
+```bash
+kubectl get svc
+kubectl describe svc <service-name>
+kubectl get endpoints <service-name>
+```
+
+**Solutions:**
+```bash
+# Check service selector
+kubectl get pods --show-labels
+kubectl describe svc <service-name>
+
+# Test connectivity
+kubectl exec -it <pod-name> -- wget -qO- http://<service-name>:8080/health
+
+# Check network policies
+kubectl get networkpolicy
+```
+
+## Performance Issues
+
+### High CPU Usage
+
+**Diagnosis:**
+```bash
+kubectl top pods -l app=llamastack
+kubectl exec -it <pod-name> -- top
+```
+
+**Solutions:**
+```bash
+# Increase CPU limits
+kubectl patch llamastackdistribution <name> -p '{"spec":{"resources":{"limits":{"cpu":"4"}}}}'
+
+# Scale horizontally
+kubectl patch llamastackdistribution <name> -p '{"spec":{"replicas":3}}'
+```
+
+### High Memory Usage
+
+**Diagnosis:**
+```bash
+kubectl top pods -l app=llamastack --containers
+kubectl exec -it <pod-name> -- free -h
+```
+
+**Solutions:**
+```bash
+# Increase memory limits
+kubectl patch llamastackdistribution <name> -p '{"spec":{"resources":{"limits":{"memory":"8Gi"}}}}'
+
+# Check for memory leaks
+kubectl exec -it <pod-name> -- ps aux --sort=-%mem
+```
+
+### Slow Response Times
+
+**Diagnosis:**
+```bash
+# Check application logs
+kubectl logs -l app=llamastack | grep -i latency
+
+# Test response time
+kubectl exec -it <pod-name> -- curl -w "@curl-format.txt" http://localhost:8080/health
+```
+
+**Solutions:**
+```bash
+# Optimize resource allocation
+kubectl patch llamastackdistribution <name> -p '{"spec":{"resources":{"requests":{"cpu":"2","memory":"4Gi"}}}}'
+
+# Enable caching
+kubectl patch llamastackdistribution <name> -p '{"spec":{"config":{"cache":{"enabled":true}}}}'
+```
+
+## Debugging Tools
+
+### Log Analysis
+
+```bash
+# Get all logs
+kubectl logs -l app=llamastack --all-containers=true
+
+# Follow logs
+kubectl logs -f -l app=llamastack
+
+# Get previous container logs
+kubectl logs <pod-name> --previous
+
+# Search for errors
+kubectl logs -l app=llamastack | grep -i error
+```
+
+### Resource Monitoring
+
+```bash
+# Check resource usage
+kubectl top pods -l app=llamastack
+kubectl top nodes
+
+# Check resource limits
+kubectl describe pod <pod-name> | grep -A 5 "Limits\|Requests"
+
+# Check resource quotas
+kubectl describe resourcequota
+```
+
+### Network Debugging
+
+```bash
+# Test DNS resolution
+kubectl exec -it <pod-name> -- nslookup kubernetes.default
+
+# Test service connectivity
+kubectl exec -it <pod-name> -- telnet <service-name> 8080
+
+# Check iptables rules
+kubectl exec -it <pod-name> -- iptables -L
+```
+
+## Advanced Debugging
+
+### Debug Container
+
+Run a debug container in the same network namespace:
+
+```bash
+kubectl debug <pod-name> -it --image=nicolaka/netshoot
+```
+
+### Port Forwarding
+
+Access services directly:
+
+```bash
+# Forward to pod
+kubectl port-forward <pod-name> 8080:8080
+
+# Forward to service
+kubectl port-forward svc/<service-name> 8080:8080
+```
+
+### Exec into Container
+
+Access container shell:
+
+```bash
+# Get shell access
+kubectl exec -it <pod-name> -- /bin/bash
+
+# Run specific commands
+kubectl exec <pod-name> -- ps aux
+kubectl exec <pod-name> -- netstat -tulpn
+```
+
+## Configuration Issues
+
+### Invalid YAML
+
+**Symptoms:**
+- kubectl apply fails
+- Validation errors
+
+**Solutions:**
+```bash
+# Validate YAML syntax
+kubectl apply --dry-run=client -f distribution.yaml
+
+# Check API version
+kubectl api-resources | grep llamastack
+
+# Validate against schema
+kubectl explain llamastackdistribution.spec
+```
+
+### Missing Dependencies
+
+**Symptoms:**
+- Operator fails to start
+- Missing CRDs
+
+**Solutions:**
+```bash
+# Install CRDs
+kubectl apply -f https://raw.githubusercontent.com/llamastack/llama-stack-k8s-operator/main/config/crd/bases/llamastack.io_llamastackdistributions.yaml
+
+# Check operator dependencies
+kubectl get deployment -n llamastack-system
+```
+
+## Getting Help
+
+### Collect Debug Information
+
+```bash
+#!/bin/bash
+# Debug information collection script
+
+echo "=== Operator Status ==="
+kubectl get pods -n llamastack-system
+kubectl logs -n llamastack-system -l app=llamastack-operator --tail=100
+
+echo "=== Distributions ==="
+kubectl get llamastackdistribution -o wide
+kubectl describe llamastackdistribution
+
+echo "=== Pods ==="
+kubectl get pods -l app=llamastack -o wide
+kubectl describe pods -l app=llamastack
+
+echo "=== Events ==="
+kubectl get events --sort-by=.metadata.creationTimestamp
+
+echo "=== Resources ==="
+kubectl top nodes
+kubectl top pods -l app=llamastack
+```
+
+### Support Channels
+
+- **GitHub Issues**: [Report bugs and feature requests](https://github.com/llamastack/llama-stack-k8s-operator/issues)
+- **Documentation**: [Official documentation](https://llamastack-k8s-operator.pages.dev)
+- **Community**: [Join the discussion](https://github.com/llamastack/llama-stack-k8s-operator/discussions)
+
+## Next Steps
+
+- [Monitoring Setup](monitoring.md)
+- [Scaling Guide](scaling.md)
+- [Configure Storage](configure-storage.md)
diff --git a/docs/content/index.md b/docs/content/index.md
new file mode 100644
index 000000000..59c7cc64e
--- /dev/null
+++ b/docs/content/index.md
@@ -0,0 +1,130 @@
+# LlamaStack Kubernetes Operator
+
+The LlamaStack Kubernetes Operator provides a simple and efficient way to deploy and manage LlamaStack distributions in Kubernetes clusters.
+
+## Overview
+
+LlamaStack is a comprehensive framework for building AI applications with Large Language Models (LLMs). This Kubernetes operator simplifies the deployment and management of LlamaStack distributions, providing:
+
+- **Easy Deployment**: Deploy LlamaStack with a single Kubernetes resource
+- **Scalability**: Automatically scale LlamaStack instances based on demand
+- **Storage Management**: Persistent storage for models and data
+- **Configuration Management**: Flexible configuration options for different use cases
+- **Monitoring**: Built-in observability and health checks
+
+## Quick Start
+
+Get started with the LlamaStack Operator in just a few steps:
+
+1. **Install the Operator**
+   ```bash
+   kubectl apply -f https://github.com/llamastack/llama-stack-k8s-operator/releases/latest/download/operator.yaml
+   ```
+
+2. **Deploy a LlamaStack Instance**
+   ```yaml
+   apiVersion: llamastack.io/v1alpha1
+   kind: LlamaStackDistribution
+   metadata:
+     name: my-llamastack
+   spec:
+     replicas: 1
+     server:
+       distribution:
+         name: "starter"
+       containerSpec:
+         port: 8321
+         resources:
+           requests:
+             memory: "2Gi"
+             cpu: "1"
+   ```
+
+3. **Apply the Configuration**
+   ```bash
+   kubectl apply -f llamastack.yaml
+   ```
+
+## Key Features
+
+### 🚀 **Simple Deployment**
+Deploy LlamaStack distributions with minimal configuration using Kubernetes-native resources.
+
+### 📈 **Auto-scaling**
+Automatically scale your LlamaStack instances based on resource utilization and demand.
+
+### 💾 **Persistent Storage**
+Built-in support for persistent storage to maintain models, cache, and application data.
+
+### 🔧 **Flexible Configuration**
+Support for multiple LlamaStack distributions and custom container images.
+
+### 📊 **Observability**
+Integrated monitoring, logging, and health checks for production deployments.
+
+### 🔒 **Security**
+Security best practices with RBAC, network policies, and secure defaults.
+
+## Architecture
+
+```mermaid
+graph TD
+    A[LlamaStackDistribution CRD] --> B[Operator Controller]
+    B --> C[Deployment]
+    B --> D[Service]
+    B --> E[ConfigMap]
+    B --> F[PersistentVolumeClaim]
+    
+    C --> G[LlamaStack Pod 1]
+    C --> H[LlamaStack Pod 2]
+    C --> I[LlamaStack Pod N]
+    
+    G --> J[Storage Volume]
+    H --> J
+    I --> J
+    
+    D --> K[Load Balancer]
+    K --> L[External Access]
+```
+
+## Use Cases
+
+### Development and Testing
+- Quick setup for development environments
+- Testing different LlamaStack configurations
+- Prototyping AI applications
+
+### Production Deployments
+- Scalable LlamaStack deployments
+- High availability configurations
+- Enterprise-grade security and monitoring
+
+### Multi-tenant Environments
+- Isolated LlamaStack instances per team
+- Resource quotas and limits
+- Namespace-based separation
+
+## Getting Started
+
+Ready to get started? Check out our comprehensive guides:
+
+- [Installation Guide](getting-started/installation.md) - Install the operator in your cluster
+- [Quick Start Tutorial](getting-started/quick-start.md) - Deploy your first LlamaStack instance
+- [Configuration Guide](getting-started/configuration.md) - Learn about configuration options
+
+## Documentation
+
+- **[API Reference](reference/api.md)** - Complete API documentation
+- **[How-to Guides](how-to/deploy-llamastack.md)** - Task-oriented guides
+- **[Examples](examples/basic-deployment.md)** - Real-world configuration examples
+- **[Contributing](contributing/development.md)** - Development and contribution guide
+
+## Community
+
+- **GitHub**: [llamastack/llama-stack-k8s-operator](https://github.com/llamastack/llama-stack-k8s-operator)
+- **Issues**: [Report bugs and request features](https://github.com/llamastack/llama-stack-k8s-operator/issues)
+- **Discussions**: [Community discussions](https://github.com/llamastack/llama-stack-k8s-operator/discussions)
+
+## License
+
+This project is licensed under the Apache License 2.0. See the [LICENSE](https://github.com/llamastack/llama-stack-k8s-operator/blob/main/LICENSE) file for details.
diff --git a/docs/content/javascripts/extra.js b/docs/content/javascripts/extra.js
new file mode 100644
index 000000000..c6d014845
--- /dev/null
+++ b/docs/content/javascripts/extra.js
@@ -0,0 +1,617 @@
+// Enhanced functionality for LlamaStack Operator Documentation
+
+document.addEventListener('DOMContentLoaded', function() {
+    // Initialize all enhancements
+    initializeCodeCopyButtons();
+    initializeAPIExplorer();
+    initializeSearchEnhancements();
+    initializeNavigationEnhancements();
+    initializeAccessibility();
+    initializeAnalytics();
+});
+
+/**
+ * Enhanced copy buttons for code blocks
+ */
+function initializeCodeCopyButtons() {
+    // Add copy success feedback
+    document.addEventListener('clipboard-success', function(e) {
+        const button = e.detail.trigger;
+        const originalText = button.textContent;
+        
+        button.textContent = 'Copied!';
+        button.style.background = '#10b981';
+        
+        setTimeout(() => {
+            button.textContent = originalText;
+            button.style.background = '';
+        }, 2000);
+    });
+
+    // Add copy error feedback
+    document.addEventListener('clipboard-error', function(e) {
+        const button = e.detail.trigger;
+        const originalText = button.textContent;
+        
+        button.textContent = 'Failed';
+        button.style.background = '#ef4444';
+        
+        setTimeout(() => {
+            button.textContent = originalText;
+            button.style.background = '';
+        }, 2000);
+    });
+}
+
+/**
+ * Interactive API Explorer
+ */
+function initializeAPIExplorer() {
+    // YAML validator for LlamaStackDistribution specs
+    const yamlInputs = document.querySelectorAll('.yaml-validator');
+    
+    yamlInputs.forEach(input => {
+        const validateButton = document.createElement('button');
+        validateButton.textContent = 'Validate YAML';
+        validateButton.className = 'md-button md-button--primary yaml-validate-btn';
+        
+        validateButton.addEventListener('click', function() {
+            validateYAML(input);
+        });
+        
+        input.parentNode.insertBefore(validateButton, input.nextSibling);
+    });
+
+    // Add interactive examples
+    addInteractiveExamples();
+}
+
+/**
+ * Validate YAML content
+ */
+function validateYAML(input) {
+    const content = input.value;
+    const resultDiv = getOrCreateResultDiv(input);
+    
+    try {
+        // Basic YAML validation (you might want to use a proper YAML parser)
+        if (!content.trim()) {
+            throw new Error('Empty YAML content');
+        }
+        
+        // Check for basic LlamaStackDistribution structure
+        if (!content.includes('apiVersion: llamastack.io/v1alpha1')) {
+            throw new Error('Missing required apiVersion');
+        }
+        
+        if (!content.includes('kind: LlamaStackDistribution')) {
+            throw new Error('Missing required kind');
+        }
+        
+        resultDiv.innerHTML = `
+            <div class="validation-success">
+                <strong>✅ Valid YAML</strong>
+                <p>Your LlamaStackDistribution configuration appears to be valid.</p>
+            </div>
+        `;
+        resultDiv.className = 'validation-result success';
+        
+    } catch (error) {
+        resultDiv.innerHTML = `
+            <div class="validation-error">
+                <strong>❌ Invalid YAML</strong>
+                <p>Error: ${error.message}</p>
+            </div>
+        `;
+        resultDiv.className = 'validation-result error';
+    }
+}
+
+/**
+ * Get or create result div for validation
+ */
+function getOrCreateResultDiv(input) {
+    let resultDiv = input.parentNode.querySelector('.validation-result');
+    if (!resultDiv) {
+        resultDiv = document.createElement('div');
+        resultDiv.className = 'validation-result';
+        input.parentNode.appendChild(resultDiv);
+    }
+    return resultDiv;
+}
+
+/**
+ * Add interactive examples
+ */
+function addInteractiveExamples() {
+    const examples = document.querySelectorAll('.interactive-example');
+    
+    examples.forEach(example => {
+        const tryButton = document.createElement('button');
+        tryButton.textContent = 'Try this example';
+        tryButton.className = 'md-button try-example-btn';
+        
+        tryButton.addEventListener('click', function() {
+            const codeBlock = example.querySelector('code');
+            if (codeBlock) {
+                copyToClipboard(codeBlock.textContent);
+                showNotification('Example copied to clipboard!');
+            }
+        });
+        
+        example.appendChild(tryButton);
+    });
+}
+
+/**
+ * Enhanced search functionality
+ */
+function initializeSearchEnhancements() {
+    const searchInput = document.querySelector('.md-search__input');
+    if (!searchInput) return;
+
+    // Add search suggestions
+    const suggestionsDiv = document.createElement('div');
+    suggestionsDiv.className = 'search-suggestions';
+    suggestionsDiv.style.display = 'none';
+    searchInput.parentNode.appendChild(suggestionsDiv);
+
+    // Popular search terms
+    const popularSearches = [
+        'installation',
+        'quick start',
+        'API reference',
+        'examples',
+        'troubleshooting',
+        'configuration',
+        'scaling',
+        'storage'
+    ];
+
+    searchInput.addEventListener('focus', function() {
+        if (!this.value) {
+            showSearchSuggestions(popularSearches, suggestionsDiv);
+        }
+    });
+
+    searchInput.addEventListener('blur', function() {
+        setTimeout(() => {
+            suggestionsDiv.style.display = 'none';
+        }, 200);
+    });
+
+    // Search analytics
+    searchInput.addEventListener('input', function() {
+        if (this.value.length > 2) {
+            trackSearchQuery(this.value);
+        }
+    });
+}
+
+/**
+ * Show search suggestions
+ */
+function showSearchSuggestions(suggestions, container) {
+    container.innerHTML = suggestions.map(term => 
+        `<div class="search-suggestion" onclick="performSearch('${term}')">${term}</div>`
+    ).join('');
+    container.style.display = 'block';
+}
+
+/**
+ * Perform search
+ */
+function performSearch(term) {
+    const searchInput = document.querySelector('.md-search__input');
+    if (searchInput) {
+        searchInput.value = term;
+        searchInput.dispatchEvent(new Event('input'));
+        searchInput.focus();
+    }
+}
+
+/**
+ * Navigation enhancements
+ */
+function initializeNavigationEnhancements() {
+    // Add breadcrumb navigation
+    addBreadcrumbs();
+    
+    // Add "Edit this page" links
+    addEditLinks();
+    
+    // Add page navigation (previous/next)
+    addPageNavigation();
+    
+    // Smooth scrolling for anchor links
+    document.querySelectorAll('a[href^="#"]').forEach(anchor => {
+        anchor.addEventListener('click', function (e) {
+            e.preventDefault();
+            const target = document.querySelector(this.getAttribute('href'));
+            if (target) {
+                target.scrollIntoView({
+                    behavior: 'smooth',
+                    block: 'start'
+                });
+            }
+        });
+    });
+}
+
+/**
+ * Add breadcrumb navigation
+ */
+function addBreadcrumbs() {
+    const nav = document.querySelector('.md-nav--primary');
+    if (!nav) return;
+
+    const currentPath = window.location.pathname;
+    const pathParts = currentPath.split('/').filter(part => part);
+    
+    if (pathParts.length > 1) {
+        const breadcrumbContainer = document.createElement('nav');
+        breadcrumbContainer.className = 'breadcrumb-nav';
+        breadcrumbContainer.setAttribute('aria-label', 'Breadcrumb');
+        
+        let breadcrumbHTML = '<ol class="breadcrumb">';
+        breadcrumbHTML += '<li><a href="/">Home</a></li>';
+        
+        let currentPath = '';
+        pathParts.forEach((part, index) => {
+            currentPath += '/' + part;
+            const isLast = index === pathParts.length - 1;
+            const displayName = part.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
+            
+            if (isLast) {
+                breadcrumbHTML += `<li aria-current="page">${displayName}</li>`;
+            } else {
+                breadcrumbHTML += `<li><a href="${currentPath}/">${displayName}</a></li>`;
+            }
+        });
+        
+        breadcrumbHTML += '</ol>';
+        breadcrumbContainer.innerHTML = breadcrumbHTML;
+        
+        const content = document.querySelector('.md-content');
+        if (content) {
+            content.insertBefore(breadcrumbContainer, content.firstChild);
+        }
+    }
+}
+
+/**
+ * Add edit links
+ */
+function addEditLinks() {
+    const repoUrl = 'https://github.com/llamastack/llama-stack-k8s-operator';
+    const currentPath = window.location.pathname;
+    const editUrl = `${repoUrl}/edit/main/docs/content${currentPath.replace(/\/$/, '')}.md`;
+    
+    const editLink = document.createElement('a');
+    editLink.href = editUrl;
+    editLink.textContent = '✏️ Edit this page';
+    editLink.className = 'edit-link';
+    editLink.target = '_blank';
+    editLink.rel = 'noopener noreferrer';
+    
+    const article = document.querySelector('article');
+    if (article) {
+        article.appendChild(editLink);
+    }
+}
+
+/**
+ * Add page navigation
+ */
+function addPageNavigation() {
+    // This would require parsing the navigation structure
+    // Implementation depends on MkDocs navigation data
+}
+
+/**
+ * Accessibility enhancements
+ */
+function initializeAccessibility() {
+    // Add skip to content link
+    const skipLink = document.createElement('a');
+    skipLink.href = '#main-content';
+    skipLink.textContent = 'Skip to main content';
+    skipLink.className = 'skip-link';
+    document.body.insertBefore(skipLink, document.body.firstChild);
+    
+    // Mark main content
+    const mainContent = document.querySelector('.md-content');
+    if (mainContent) {
+        mainContent.id = 'main-content';
+    }
+    
+    // Enhance keyboard navigation
+    document.addEventListener('keydown', function(e) {
+        // Alt + S for search
+        if (e.altKey && e.key === 's') {
+            e.preventDefault();
+            const searchInput = document.querySelector('.md-search__input');
+            if (searchInput) {
+                searchInput.focus();
+            }
+        }
+        
+        // Alt + H for home
+        if (e.altKey && e.key === 'h') {
+            e.preventDefault();
+            window.location.href = '/';
+        }
+    });
+    
+    // Add ARIA labels to interactive elements
+    document.querySelectorAll('.md-nav__link').forEach(link => {
+        if (!link.getAttribute('aria-label')) {
+            link.setAttribute('aria-label', `Navigate to ${link.textContent.trim()}`);
+        }
+    });
+}
+
+/**
+ * Analytics and tracking
+ */
+function initializeAnalytics() {
+    // Track page views
+    trackPageView();
+    
+    // Track user interactions
+    trackUserInteractions();
+    
+    // Track performance metrics
+    trackPerformanceMetrics();
+}
+
+/**
+ * Track page view
+ */
+function trackPageView() {
+    // Implementation depends on your analytics provider
+    console.log('Page view tracked:', window.location.pathname);
+}
+
+/**
+ * Track search queries
+ */
+function trackSearchQuery(query) {
+    // Implementation depends on your analytics provider
+    console.log('Search query tracked:', query);
+}
+
+/**
+ * Track user interactions
+ */
+function trackUserInteractions() {
+    // Track copy button clicks
+    document.addEventListener('click', function(e) {
+        if (e.target.classList.contains('md-clipboard')) {
+            console.log('Copy button clicked');
+        }
+        
+        if (e.target.classList.contains('try-example-btn')) {
+            console.log('Try example button clicked');
+        }
+    });
+}
+
+/**
+ * Track performance metrics
+ */
+function trackPerformanceMetrics() {
+    // Track page load time
+    window.addEventListener('load', function() {
+        const loadTime = performance.now();
+        console.log('Page load time:', loadTime);
+    });
+}
+
+/**
+ * Utility functions
+ */
+
+/**
+ * Copy text to clipboard
+ */
+function copyToClipboard(text) {
+    if (navigator.clipboard) {
+        navigator.clipboard.writeText(text);
+    } else {
+        // Fallback for older browsers
+        const textArea = document.createElement('textarea');
+        textArea.value = text;
+        document.body.appendChild(textArea);
+        textArea.select();
+        document.execCommand('copy');
+        document.body.removeChild(textArea);
+    }
+}
+
+/**
+ * Show notification
+ */
+function showNotification(message, type = 'info') {
+    const notification = document.createElement('div');
+    notification.className = `notification notification--${type}`;
+    notification.textContent = message;
+    
+    document.body.appendChild(notification);
+    
+    // Animate in
+    setTimeout(() => {
+        notification.classList.add('notification--visible');
+    }, 100);
+    
+    // Remove after 3 seconds
+    setTimeout(() => {
+        notification.classList.remove('notification--visible');
+        setTimeout(() => {
+            document.body.removeChild(notification);
+        }, 300);
+    }, 3000);
+}
+
+/**
+ * Debounce function
+ */
+function debounce(func, wait) {
+    let timeout;
+    return function executedFunction(...args) {
+        const later = () => {
+            clearTimeout(timeout);
+            func(...args);
+        };
+        clearTimeout(timeout);
+        timeout = setTimeout(later, wait);
+    };
+}
+
+/**
+ * Throttle function
+ */
+function throttle(func, limit) {
+    let inThrottle;
+    return function() {
+        const args = arguments;
+        const context = this;
+        if (!inThrottle) {
+            func.apply(context, args);
+            inThrottle = true;
+            setTimeout(() => inThrottle = false, limit);
+        }
+    };
+}
+
+// Add CSS for notifications and other dynamic elements
+const dynamicStyles = `
+    .notification {
+        position: fixed;
+        top: 20px;
+        right: 20px;
+        padding: 12px 20px;
+        border-radius: 6px;
+        color: white;
+        font-weight: 500;
+        z-index: 1000;
+        transform: translateX(100%);
+        transition: transform 0.3s ease;
+    }
+    
+    .notification--visible {
+        transform: translateX(0);
+    }
+    
+    .notification--info {
+        background: #2563eb;
+    }
+    
+    .notification--success {
+        background: #10b981;
+    }
+    
+    .notification--error {
+        background: #ef4444;
+    }
+    
+    .breadcrumb-nav {
+        margin-bottom: 2rem;
+        padding: 1rem 0;
+        border-bottom: 1px solid var(--md-default-fg-color--lightest);
+    }
+    
+    .breadcrumb {
+        list-style: none;
+        padding: 0;
+        margin: 0;
+        display: flex;
+        align-items: center;
+        gap: 0.5rem;
+    }
+    
+    .breadcrumb li:not(:last-child)::after {
+        content: "›";
+        margin-left: 0.5rem;
+        color: var(--md-default-fg-color--light);
+    }
+    
+    .breadcrumb a {
+        color: var(--md-default-fg-color--light);
+        text-decoration: none;
+    }
+    
+    .breadcrumb a:hover {
+        color: var(--md-primary-fg-color);
+    }
+    
+    .edit-link {
+        display: inline-block;
+        margin-top: 2rem;
+        padding: 0.5rem 1rem;
+        background: var(--md-default-fg-color--lightest);
+        border-radius: 4px;
+        text-decoration: none;
+        font-size: 0.9em;
+    }
+    
+    .edit-link:hover {
+        background: var(--md-default-fg-color--lighter);
+    }
+    
+    .search-suggestions {
+        position: absolute;
+        top: 100%;
+        left: 0;
+        right: 0;
+        background: var(--md-default-bg-color);
+        border: 1px solid var(--md-default-fg-color--lightest);
+        border-radius: 4px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+        z-index: 100;
+    }
+    
+    .search-suggestion {
+        padding: 0.5rem 1rem;
+        cursor: pointer;
+        border-bottom: 1px solid var(--md-default-fg-color--lightest);
+    }
+    
+    .search-suggestion:hover {
+        background: var(--md-default-fg-color--lightest);
+    }
+    
+    .search-suggestion:last-child {
+        border-bottom: none;
+    }
+    
+    .validation-result {
+        margin-top: 1rem;
+        padding: 1rem;
+        border-radius: 4px;
+    }
+    
+    .validation-result.success {
+        background: rgba(16, 185, 129, 0.1);
+        border: 1px solid #10b981;
+    }
+    
+    .validation-result.error {
+        background: rgba(239, 68, 68, 0.1);
+        border: 1px solid #ef4444;
+    }
+    
+    .yaml-validate-btn {
+        margin-top: 0.5rem;
+        margin-bottom: 1rem;
+    }
+    
+    .try-example-btn {
+        margin-top: 1rem;
+    }
+`;
+
+// Inject dynamic styles
+const styleSheet = document.createElement('style');
+styleSheet.textContent = dynamicStyles;
+document.head.appendChild(styleSheet);
diff --git a/docs/content/reference/cli.md b/docs/content/reference/cli.md
new file mode 100644
index 000000000..73fb4a9a1
--- /dev/null
+++ b/docs/content/reference/cli.md
@@ -0,0 +1,456 @@
+# CLI Reference
+
+Command-line interface reference for LlamaStack Kubernetes Operator.
+
+## kubectl Commands
+
+### Basic Operations
+
+#### List Distributions
+
+```bash
+# List all LlamaStack distributions
+kubectl get llamastackdistribution
+
+# List with additional details
+kubectl get llamastackdistribution -o wide
+
+# List in all namespaces
+kubectl get llamastackdistribution --all-namespaces
+```
+
+#### Describe Distribution
+
+```bash
+# Get detailed information
+kubectl describe llamastackdistribution <name>
+
+# Get YAML output
+kubectl get llamastackdistribution <name> -o yaml
+
+# Get JSON output
+kubectl get llamastackdistribution <name> -o json
+```
+
+#### Create Distribution
+
+```bash
+# Create from file
+kubectl apply -f llamastack-distribution.yaml
+
+# Create from URL
+kubectl apply -f https://example.com/llamastack.yaml
+
+# Dry run to validate
+kubectl apply -f llamastack.yaml --dry-run=client
+```
+
+#### Update Distribution
+
+```bash
+# Apply changes from file
+kubectl apply -f llamastack-distribution.yaml
+
+# Edit directly
+kubectl edit llamastackdistribution <name>
+
+# Patch specific fields
+kubectl patch llamastackdistribution <name> -p '{"spec":{"replicas":3}}'
+```
+
+#### Delete Distribution
+
+```bash
+# Delete by name
+kubectl delete llamastackdistribution <name>
+
+# Delete from file
+kubectl delete -f llamastack-distribution.yaml
+
+# Delete all distributions
+kubectl delete llamastackdistribution --all
+```
+
+### Advanced Operations
+
+#### Scale Distribution
+
+```bash
+# Scale to specific replica count
+kubectl scale llamastackdistribution <name> --replicas=5
+
+# Scale multiple distributions
+kubectl scale llamastackdistribution --all --replicas=3
+```
+
+#### Rollout Management
+
+```bash
+# Check rollout status
+kubectl rollout status deployment/<name>
+
+# Rollout history
+kubectl rollout history deployment/<name>
+
+# Rollback to previous version
+kubectl rollout undo deployment/<name>
+
+# Rollback to specific revision
+kubectl rollout undo deployment/<name> --to-revision=2
+```
+
+#### Resource Management
+
+```bash
+# Get resource usage
+kubectl top pods -l app=<distribution-name>
+
+# Get node usage
+kubectl top nodes
+
+# Describe resource quotas
+kubectl describe resourcequota
+```
+
+## Operator Management
+
+### Installation
+
+```bash
+# Install operator
+kubectl apply -f https://github.com/llamastack/llama-stack-k8s-operator/releases/latest/download/operator.yaml
+
+# Install specific version
+kubectl apply -f https://github.com/llamastack/llama-stack-k8s-operator/releases/download/v1.0.0/operator.yaml
+
+# Install from local file
+kubectl apply -f operator.yaml
+```
+
+### Operator Status
+
+```bash
+# Check operator pods
+kubectl get pods -n llamastack-system
+
+# Check operator logs
+kubectl logs -n llamastack-system -l app=llamastack-operator
+
+# Follow operator logs
+kubectl logs -n llamastack-system -l app=llamastack-operator -f
+```
+
+### Operator Configuration
+
+```bash
+# Get operator configuration
+kubectl get configmap -n llamastack-system llamastack-config -o yaml
+
+# Update operator configuration
+kubectl patch configmap -n llamastack-system llamastack-config -p '{"data":{"config.yaml":"..."}}'
+```
+
+## Debugging Commands
+
+### Pod Operations
+
+```bash
+# List pods for a distribution
+kubectl get pods -l app=<distribution-name>
+
+# Get pod logs
+kubectl logs <pod-name>
+
+# Follow pod logs
+kubectl logs -f <pod-name>
+
+# Get previous container logs
+kubectl logs <pod-name> --previous
+
+# Execute commands in pod
+kubectl exec -it <pod-name> -- /bin/bash
+
+# Copy files to/from pod
+kubectl cp <local-file> <pod-name>:<remote-path>
+kubectl cp <pod-name>:<remote-path> <local-file>
+```
+
+### Service Operations
+
+```bash
+# List services
+kubectl get svc -l app=<distribution-name>
+
+# Describe service
+kubectl describe svc <service-name>
+
+# Get service endpoints
+kubectl get endpoints <service-name>
+
+# Port forward to service
+kubectl port-forward svc/<service-name> 8080:8080
+```
+
+### Network Debugging
+
+```bash
+# Test DNS resolution
+kubectl exec -it <pod-name> -- nslookup <service-name>
+
+# Test connectivity
+kubectl exec -it <pod-name> -- curl http://<service-name>:8080/health
+
+# Check network policies
+kubectl get networkpolicy
+```
+
+### Storage Operations
+
+```bash
+# List persistent volume claims
+kubectl get pvc -l app=<distribution-name>
+
+# Describe PVC
+kubectl describe pvc <pvc-name>
+
+# Check storage usage
+kubectl exec -it <pod-name> -- df -h
+
+# List storage classes
+kubectl get storageclass
+```
+
+## Monitoring Commands
+
+### Resource Monitoring
+
+```bash
+# Get resource usage for pods
+kubectl top pods -l app=<distribution-name>
+
+# Get resource usage for nodes
+kubectl top nodes
+
+# Get resource usage with containers
+kubectl top pods -l app=<distribution-name> --containers
+```
+
+### Event Monitoring
+
+```bash
+# Get events for a distribution
+kubectl get events --field-selector involvedObject.name=<distribution-name>
+
+# Get recent events
+kubectl get events --sort-by=.metadata.creationTimestamp
+
+# Watch events in real-time
+kubectl get events --watch
+```
+
+### Metrics Access
+
+```bash
+# Port forward to metrics endpoint
+kubectl port-forward <pod-name> 9090:9090
+
+# Access metrics via curl
+kubectl exec -it <pod-name> -- curl http://localhost:9090/metrics
+```
+
+## Configuration Management
+
+### ConfigMaps
+
+```bash
+# Create ConfigMap from file
+kubectl create configmap llamastack-config --from-file=config.yaml
+
+# Create ConfigMap from literal values
+kubectl create configmap llamastack-config --from-literal=key1=value1
+
+# Update ConfigMap
+kubectl patch configmap llamastack-config -p '{"data":{"key":"new-value"}}'
+
+# Get ConfigMap
+kubectl get configmap llamastack-config -o yaml
+```
+
+### Secrets
+
+```bash
+# Create secret from file
+kubectl create secret generic llamastack-secret --from-file=secret.txt
+
+# Create secret from literal values
+kubectl create secret generic llamastack-secret --from-literal=password=secret123
+
+# Get secret (base64 encoded)
+kubectl get secret llamastack-secret -o yaml
+
+# Decode secret
+kubectl get secret llamastack-secret -o jsonpath='{.data.password}' | base64 -d
+```
+
+## Backup and Recovery
+
+### Backup
+
+```bash
+# Backup distribution configuration
+kubectl get llamastackdistribution <name> -o yaml > backup.yaml
+
+# Backup all distributions
+kubectl get llamastackdistribution -o yaml > all-distributions-backup.yaml
+
+# Create volume snapshot
+kubectl create volumesnapshot <snapshot-name> --from-pvc=<pvc-name>
+```
+
+### Recovery
+
+```bash
+# Restore from backup
+kubectl apply -f backup.yaml
+
+# Restore from volume snapshot
+kubectl apply -f - <<EOF
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: restored-pvc
+spec:
+  dataSource:
+    name: <snapshot-name>
+    kind: VolumeSnapshot
+    apiGroup: snapshot.storage.k8s.io
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 50Gi
+EOF
+```
+
+## Useful Aliases
+
+Add these to your shell configuration:
+
+```bash
+# Basic aliases
+alias k='kubectl'
+alias kgd='kubectl get llamastackdistribution'
+alias kdd='kubectl describe llamastackdistribution'
+alias ked='kubectl edit llamastackdistribution'
+
+# Pod aliases
+alias kgp='kubectl get pods'
+alias kdp='kubectl describe pod'
+alias kl='kubectl logs'
+alias kex='kubectl exec -it'
+
+# Service aliases
+alias kgs='kubectl get svc'
+alias kds='kubectl describe svc'
+alias kpf='kubectl port-forward'
+
+# Monitoring aliases
+alias ktop='kubectl top'
+alias kge='kubectl get events --sort-by=.metadata.creationTimestamp'
+```
+
+## Bash Completion
+
+Enable kubectl completion:
+
+```bash
+# For bash
+echo 'source <(kubectl completion bash)' >>~/.bashrc
+
+# For zsh
+echo 'source <(kubectl completion zsh)' >>~/.zshrc
+
+# For fish
+kubectl completion fish | source
+```
+
+## Common Workflows
+
+### Development Workflow
+
+```bash
+# 1. Create development distribution
+kubectl apply -f dev-llamastack.yaml
+
+# 2. Check status
+kubectl get llamastackdistribution dev-llamastack
+
+# 3. Check pods
+kubectl get pods -l app=dev-llamastack
+
+# 4. View logs
+kubectl logs -f -l app=dev-llamastack
+
+# 5. Test connectivity
+kubectl port-forward svc/dev-llamastack 8080:8080
+
+# 6. Update configuration
+kubectl edit llamastackdistribution dev-llamastack
+
+# 7. Clean up
+kubectl delete llamastackdistribution dev-llamastack
+```
+
+### Production Deployment
+
+```bash
+# 1. Validate configuration
+kubectl apply -f prod-llamastack.yaml --dry-run=client
+
+# 2. Deploy
+kubectl apply -f prod-llamastack.yaml
+
+# 3. Monitor rollout
+kubectl rollout status deployment/prod-llamastack
+
+# 4. Verify health
+kubectl get pods -l app=prod-llamastack
+kubectl logs -l app=prod-llamastack | grep "Ready"
+
+# 5. Scale if needed
+kubectl scale llamastackdistribution prod-llamastack --replicas=5
+
+# 6. Monitor metrics
+kubectl top pods -l app=prod-llamastack
+```
+
+### Troubleshooting Workflow
+
+```bash
+# 1. Check distribution status
+kubectl describe llamastackdistribution <name>
+
+# 2. Check pod status
+kubectl get pods -l app=<name>
+kubectl describe pod <pod-name>
+
+# 3. Check logs
+kubectl logs <pod-name>
+kubectl logs <pod-name> --previous
+
+# 4. Check events
+kubectl get events --field-selector involvedObject.name=<name>
+
+# 5. Debug network
+kubectl exec -it <pod-name> -- curl http://localhost:8080/health
+
+# 6. Check resources
+kubectl top pods -l app=<name>
+kubectl describe node <node-name>
+```
+
+## Next Steps
+
+- [API Reference](api.md)
+- [Configuration Reference](configuration.md)
+- [Troubleshooting Guide](../how-to/troubleshooting.md)
diff --git a/docs/content/reference/configuration.md b/docs/content/reference/configuration.md
new file mode 100644
index 000000000..befc08ce8
--- /dev/null
+++ b/docs/content/reference/configuration.md
@@ -0,0 +1,513 @@
+# Configuration Reference
+
+Complete reference for configuring LlamaStack Kubernetes Operator based on the actual API.
+
+## LlamaStackDistribution Specification
+
+### Basic Structure
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: string
+  namespace: string
+spec:
+  replicas: integer  # Default: 1
+  server:
+    distribution:
+      # Either name OR image (mutually exclusive)
+      name: string     # Distribution name from supported distributions
+      image: string    # Direct container image reference
+    containerSpec:
+      name: string     # Default: "llama-stack"
+      port: integer    # Default: 8321
+      resources:
+        requests:
+          cpu: string
+          memory: string
+        limits:
+          cpu: string
+          memory: string
+      env:
+      - name: string
+        value: string
+    podOverrides:      # Optional pod-level customization
+      volumes:
+      - name: string
+        # ... volume spec
+      volumeMounts:
+      - name: string
+        mountPath: string
+    storage:           # Optional persistent storage
+      size: string     # Default: "10Gi"
+      mountPath: string # Default: "/.llama"
+```
+
+## Core Configuration
+
+### Distribution Configuration
+
+You can specify either a distribution name OR a direct image reference:
+
+```yaml
+# Option 1: Use a named distribution (recommended)
+spec:
+  server:
+    distribution:
+      name: "starter"  # Maps to supported distributions
+
+# Option 2: Use a direct image
+spec:
+  server:
+    distribution:
+      image: "llamastack/llamastack:latest"
+```
+
+### Supported Distribution Names
+
+The operator supports the following **7 pre-configured distributions**:
+
+| Distribution Name | Image | Description |
+|-------------------|-------|-------------|
+| `starter` | `docker.io/llamastack/distribution-starter:latest` | **Recommended default** - General purpose LlamaStack distribution |
+| `ollama` | `docker.io/llamastack/distribution-ollama:latest` | Ollama-based distribution for local inference |
+| `bedrock` | `docker.io/llamastack/distribution-bedrock:latest` | AWS Bedrock distribution for cloud-based models |
+| `remote-vllm` | `docker.io/llamastack/distribution-remote-vllm:latest` | Remote vLLM server integration |
+| `tgi` | `docker.io/llamastack/distribution-tgi:latest` | Hugging Face Text Generation Inference |
+| `together` | `docker.io/llamastack/distribution-together:latest` | Together AI API integration |
+| `vllm-gpu` | `docker.io/llamastack/distribution-vllm-gpu:latest` | High-performance GPU inference with vLLM |
+| `remote-vllm` | `docker.io/llamastack/distribution-remote-vllm:latest` | Remote vLLM distribution |
+| `sambanova` | `docker.io/llamastack/distribution-sambanova:latest` | SambaNova distribution |
+| `tgi` | `docker.io/llamastack/distribution-tgi:latest` | Text Generation Inference distribution |
+| `together` | `docker.io/llamastack/distribution-together:latest` | Together AI distribution |
+| `vllm-gpu` | `docker.io/llamastack/distribution-vllm-gpu:latest` | vLLM GPU distribution |
+| `watsonx` | `docker.io/llamastack/distribution-watsonx:latest` | IBM watsonx distribution |
+| `fireworks` | `docker.io/llamastack/distribution-fireworks:latest` | Fireworks AI distribution |
+
+**Examples:**
+
+```yaml
+# Ollama distribution
+spec:
+  server:
+    distribution:
+      name: "ollama"
+
+# Hugging Face Endpoint
+spec:
+  server:
+    distribution:
+      name: "hf-endpoint"
+
+# NVIDIA distribution
+spec:
+  server:
+    distribution:
+      name: "nvidia"
+
+# vLLM GPU distribution
+spec:
+  server:
+    distribution:
+      name: "vllm-gpu"
+```
+
+### Replica Configuration
+
+```yaml
+spec:
+  replicas: 3  # Default: 1
+```
+
+### Container Configuration
+
+```yaml
+spec:
+  server:
+    containerSpec:
+      name: "llama-stack"  # Default container name
+      port: 8321           # Default port
+      resources:
+        requests:
+          cpu: "1"
+          memory: "2Gi"
+        limits:
+          cpu: "2"
+          memory: "4Gi"
+      env:
+      - name: "INFERENCE_MODEL"
+        value: "llama2-7b"
+      - name: "LOG_LEVEL"
+        value: "INFO"
+```
+
+## Storage Configuration
+
+### Basic Storage
+
+```yaml
+spec:
+  server:
+    storage:
+      size: "50Gi"              # Default: "10Gi"
+      mountPath: "/.llama"      # Default mount path
+```
+
+### Custom Mount Path
+
+```yaml
+spec:
+  server:
+    storage:
+      size: "100Gi"
+      mountPath: "/custom/path"
+```
+
+## Advanced Pod Customization
+
+### Additional Volumes
+
+```yaml
+spec:
+  server:
+    podOverrides:
+      volumes:
+      - name: "model-cache"
+        emptyDir:
+          sizeLimit: "20Gi"
+      - name: "config"
+        configMap:
+          name: "llamastack-config"
+      volumeMounts:
+      - name: "model-cache"
+        mountPath: "/cache"
+      - name: "config"
+        mountPath: "/config"
+        readOnly: true
+```
+
+### ConfigMap Integration
+
+```yaml
+spec:
+  server:
+    podOverrides:
+      volumes:
+      - name: "llamastack-config"
+        configMap:
+          name: "my-llamastack-config"
+      volumeMounts:
+      - name: "llamastack-config"
+        mountPath: "/app/config"
+```
+
+## Configuration Examples
+
+### Minimal Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: simple-llamastack
+spec:
+  server:
+    distribution:
+      name: "ollama"
+```
+
+### Development Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: llamastack-dev
+spec:
+  replicas: 1
+  server:
+    distribution:
+      image: "llamastack/llamastack:latest"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          cpu: "500m"
+          memory: "1Gi"
+        limits:
+          cpu: "1"
+          memory: "2Gi"
+      env:
+      - name: "LOG_LEVEL"
+        value: "DEBUG"
+    storage:
+      size: "20Gi"
+```
+
+### Production Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: llamastack-prod
+spec:
+  replicas: 3
+  server:
+    distribution:
+      image: "llamastack/llamastack:v1.0.0"
+    containerSpec:
+      name: "llama-stack"
+      port: 8321
+      resources:
+        requests:
+          cpu: "2"
+          memory: "4Gi"
+        limits:
+          cpu: "4"
+          memory: "8Gi"
+      env:
+      - name: "INFERENCE_MODEL"
+        value: "llama2-70b"
+      - name: "MAX_WORKERS"
+        value: "4"
+    storage:
+      size: "500Gi"
+      mountPath: "/.llama"
+    podOverrides:
+      volumes:
+      - name: "model-cache"
+        emptyDir:
+          sizeLimit: "100Gi"
+      volumeMounts:
+      - name: "model-cache"
+        mountPath: "/cache"
+```
+
+### Custom Image with Configuration
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: custom-llamastack
+spec:
+  replicas: 2
+  server:
+    distribution:
+      image: "myregistry.com/custom-llamastack:v1.0"
+    containerSpec:
+      port: 8321
+      resources:
+        requests:
+          cpu: "1"
+          memory: "2Gi"
+        limits:
+          cpu: "2"
+          memory: "4Gi"
+      env:
+      - name: "CUSTOM_CONFIG"
+        value: "/config/custom.yaml"
+    storage:
+      size: "100Gi"
+    podOverrides:
+      volumes:
+      - name: "custom-config"
+        configMap:
+          name: "llamastack-custom-config"
+      volumeMounts:
+      - name: "custom-config"
+        mountPath: "/config"
+        readOnly: true
+```
+
+### Distribution-Specific Examples
+
+#### Ollama Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: ollama-llamastack
+spec:
+  replicas: 1
+  server:
+    distribution:
+      name: "ollama"
+    containerSpec:
+      port: 8321
+      env:
+      - name: OLLAMA_URL
+        value: "http://ollama-server-service.ollama-dist.svc.cluster.local:11434"
+    storage:
+      size: "20Gi"
+```
+
+#### Hugging Face Endpoint
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: hf-endpoint-llamastack
+spec:
+  server:
+    distribution:
+      name: "hf-endpoint"
+    containerSpec:
+      env:
+      - name: HF_TOKEN
+        valueFrom:
+          secretKeyRef:
+            name: hf-credentials
+            key: token
+      - name: HF_MODEL_ID
+        value: "meta-llama/Llama-2-7b-chat-hf"
+```
+
+#### NVIDIA Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: nvidia-llamastack
+spec:
+  server:
+    distribution:
+      name: "nvidia"
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "1"
+        limits:
+          nvidia.com/gpu: "1"
+      env:
+      - name: NVIDIA_API_KEY
+        valueFrom:
+          secretKeyRef:
+            name: nvidia-credentials
+            key: api-key
+```
+
+#### vLLM GPU Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: vllm-gpu-llamastack
+spec:
+  server:
+    distribution:
+      name: "vllm-gpu"
+    containerSpec:
+      resources:
+        requests:
+          nvidia.com/gpu: "1"
+          memory: "8Gi"
+        limits:
+          nvidia.com/gpu: "1"
+          memory: "16Gi"
+      env:
+      - name: MODEL_NAME
+        value: "meta-llama/Llama-2-7b-chat-hf"
+    storage:
+      size: "50Gi"
+```
+
+#### AWS Bedrock Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: bedrock-llamastack
+spec:
+  server:
+    distribution:
+      name: "bedrock"
+    containerSpec:
+      env:
+      - name: AWS_REGION
+        value: "us-east-1"
+      - name: AWS_ACCESS_KEY_ID
+        valueFrom:
+          secretKeyRef:
+            name: aws-credentials
+            key: access-key-id
+      - name: AWS_SECRET_ACCESS_KEY
+        valueFrom:
+          secretKeyRef:
+            name: aws-credentials
+            key: secret-access-key
+```
+
+#### Together AI Distribution
+
+```yaml
+apiVersion: llamastack.io/v1alpha1
+kind: LlamaStackDistribution
+metadata:
+  name: together-llamastack
+spec:
+  server:
+    distribution:
+      name: "together"
+    containerSpec:
+      env:
+      - name: TOGETHER_API_KEY
+        valueFrom:
+          secretKeyRef:
+            name: together-credentials
+            key: api-key
+      - name: MODEL_NAME
+        value: "meta-llama/Llama-2-7b-chat-hf"
+```
+
+## Status Information
+
+The operator provides status information about the distribution:
+
+```yaml
+status:
+  version: "1.0.0"
+  ready: true
+  distributionConfig:
+    activeDistribution: "meta-reference"
+    providers:
+    - api: "inference"
+      provider_id: "meta-reference"
+      provider_type: "inference"
+    availableDistributions:
+      "meta-reference": "llamastack/llamastack:latest"
+```
+
+## Constants and Defaults
+
+The API defines several constants:
+
+- **Default Container Name**: `llama-stack`
+- **Default Server Port**: `8321`
+- **Default Service Port Name**: `http`
+- **Default Mount Path**: `/.llama`
+- **Default Storage Size**: `10Gi`
+- **Default Label Key**: `app`
+- **Default Label Value**: `llama-stack`
+
+## Validation Rules
+
+The API includes validation:
+
+- **Distribution**: Only one of `name` or `image` can be specified
+- **Port**: Must be a valid port number
+- **Resources**: Follow Kubernetes resource requirements format
+- **Storage Size**: Must be a valid Kubernetes quantity
+
+## Next Steps
+
+- [API Reference](api.md)
+- [CLI Reference](cli.md)
+- [How-to Guides](../how-to/deploy-llamastack.md)
diff --git a/docs/content/stylesheets/extra.css b/docs/content/stylesheets/extra.css
new file mode 100644
index 000000000..45413ff29
--- /dev/null
+++ b/docs/content/stylesheets/extra.css
@@ -0,0 +1,322 @@
+/* Custom styles for LlamaStack Operator Documentation */
+
+:root {
+  --llamastack-primary: #2563eb;
+  --llamastack-secondary: #64748b;
+  --llamastack-accent: #0ea5e9;
+  --llamastack-success: #10b981;
+  --llamastack-warning: #f59e0b;
+  --llamastack-error: #ef4444;
+}
+
+/* Enhanced code blocks */
+.highlight {
+  border-radius: 8px;
+  overflow: hidden;
+}
+
+.highlight pre {
+  margin: 0;
+  padding: 1rem;
+  background: var(--md-code-bg-color);
+}
+
+/* Copy button styling */
+.md-clipboard {
+  border-radius: 4px;
+  transition: all 0.2s ease;
+}
+
+.md-clipboard:hover {
+  background-color: var(--llamastack-primary);
+  color: white;
+}
+
+/* Enhanced admonitions */
+.md-typeset .admonition {
+  border-radius: 8px;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+
+.md-typeset .admonition.note {
+  border-left: 4px solid var(--llamastack-primary);
+}
+
+.md-typeset .admonition.tip {
+  border-left: 4px solid var(--llamastack-success);
+}
+
+.md-typeset .admonition.warning {
+  border-left: 4px solid var(--llamastack-warning);
+}
+
+.md-typeset .admonition.danger {
+  border-left: 4px solid var(--llamastack-error);
+}
+
+/* API reference styling */
+.api-section {
+  margin: 2rem 0;
+  padding: 1.5rem;
+  border: 1px solid var(--md-default-fg-color--lightest);
+  border-radius: 8px;
+  background: var(--md-code-bg-color);
+}
+
+.api-section h3 {
+  margin-top: 0;
+  color: var(--llamastack-primary);
+}
+
+.api-field {
+  margin: 1rem 0;
+  padding: 0.5rem;
+  background: rgba(37, 99, 235, 0.05);
+  border-radius: 4px;
+}
+
+.api-field-name {
+  font-family: var(--md-code-font);
+  font-weight: bold;
+  color: var(--llamastack-primary);
+}
+
+.api-field-type {
+  font-family: var(--md-code-font);
+  color: var(--llamastack-secondary);
+  font-size: 0.9em;
+}
+
+.api-field-description {
+  margin-top: 0.5rem;
+  color: var(--md-default-fg-color);
+}
+
+/* Enhanced tables */
+.md-typeset table:not([class]) {
+  border-radius: 8px;
+  overflow: hidden;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+
+.md-typeset table:not([class]) th {
+  background: var(--llamastack-primary);
+  color: white;
+  font-weight: 600;
+}
+
+.md-typeset table:not([class]) tr:nth-child(even) {
+  background: rgba(37, 99, 235, 0.05);
+}
+
+/* Navigation enhancements */
+.md-nav__item--active > .md-nav__link {
+  color: var(--llamastack-primary);
+  font-weight: 600;
+}
+
+.md-nav__link:hover {
+  color: var(--llamastack-accent);
+}
+
+/* Search enhancements */
+.md-search__input {
+  border-radius: 8px;
+}
+
+.md-search__input:focus {
+  border-color: var(--llamastack-primary);
+  box-shadow: 0 0 0 2px rgba(37, 99, 235, 0.2);
+}
+
+/* Button styling */
+.md-button {
+  border-radius: 6px;
+  transition: all 0.2s ease;
+}
+
+.md-button--primary {
+  background: var(--llamastack-primary);
+  border-color: var(--llamastack-primary);
+}
+
+.md-button--primary:hover {
+  background: var(--llamastack-accent);
+  border-color: var(--llamastack-accent);
+  transform: translateY(-1px);
+}
+
+/* Code snippet enhancements */
+.md-typeset code {
+  background: rgba(37, 99, 235, 0.1);
+  color: var(--llamastack-primary);
+  padding: 0.2em 0.4em;
+  border-radius: 4px;
+  font-size: 0.9em;
+}
+
+/* Mermaid diagram styling */
+.mermaid {
+  text-align: center;
+  margin: 2rem 0;
+}
+
+.mermaid svg {
+  max-width: 100%;
+  height: auto;
+}
+
+/* Status badges */
+.status-badge {
+  display: inline-block;
+  padding: 0.25rem 0.5rem;
+  border-radius: 4px;
+  font-size: 0.8em;
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+}
+
+.status-badge--stable {
+  background: var(--llamastack-success);
+  color: white;
+}
+
+.status-badge--beta {
+  background: var(--llamastack-warning);
+  color: white;
+}
+
+.status-badge--alpha {
+  background: var(--llamastack-error);
+  color: white;
+}
+
+/* Command line styling */
+.command-line {
+  background: #1e293b;
+  color: #e2e8f0;
+  padding: 1rem;
+  border-radius: 8px;
+  font-family: var(--md-code-font);
+  overflow-x: auto;
+}
+
+.command-line::before {
+  content: "$ ";
+  color: var(--llamastack-success);
+  font-weight: bold;
+}
+
+/* Responsive adjustments */
+@media screen and (max-width: 768px) {
+  .api-section {
+    padding: 1rem;
+    margin: 1rem 0;
+  }
+  
+  .md-typeset table:not([class]) {
+    font-size: 0.9em;
+  }
+  
+  .highlight pre {
+    padding: 0.75rem;
+    font-size: 0.9em;
+  }
+}
+
+/* Dark mode adjustments */
+[data-md-color-scheme="slate"] {
+  --llamastack-primary: #60a5fa;
+  --llamastack-secondary: #94a3b8;
+  --llamastack-accent: #38bdf8;
+}
+
+[data-md-color-scheme="slate"] .api-section {
+  background: rgba(255, 255, 255, 0.05);
+  border-color: rgba(255, 255, 255, 0.1);
+}
+
+[data-md-color-scheme="slate"] .api-field {
+  background: rgba(96, 165, 250, 0.1);
+}
+
+[data-md-color-scheme="slate"] .md-typeset table:not([class]) tr:nth-child(even) {
+  background: rgba(96, 165, 250, 0.05);
+}
+
+/* Print styles */
+@media print {
+  .md-header,
+  .md-sidebar,
+  .md-footer {
+    display: none !important;
+  }
+  
+  .md-main__inner {
+    margin: 0 !important;
+  }
+  
+  .md-content {
+    max-width: none !important;
+  }
+  
+  .highlight {
+    break-inside: avoid;
+  }
+  
+  .api-section {
+    break-inside: avoid;
+    box-shadow: none;
+    border: 1px solid #ccc;
+  }
+}
+
+/* Accessibility improvements */
+.md-nav__link:focus,
+.md-search__input:focus,
+.md-button:focus {
+  outline: 2px solid var(--llamastack-primary);
+  outline-offset: 2px;
+}
+
+/* Skip to content link */
+.skip-link {
+  position: absolute;
+  top: -40px;
+  left: 6px;
+  background: var(--llamastack-primary);
+  color: white;
+  padding: 8px;
+  text-decoration: none;
+  border-radius: 4px;
+  z-index: 1000;
+}
+
+.skip-link:focus {
+  top: 6px;
+}
+
+/* Loading animation for dynamic content */
+.loading {
+  display: inline-block;
+  width: 20px;
+  height: 20px;
+  border: 3px solid rgba(37, 99, 235, 0.3);
+  border-radius: 50%;
+  border-top-color: var(--llamastack-primary);
+  animation: spin 1s ease-in-out infinite;
+}
+
+@keyframes spin {
+  to { transform: rotate(360deg); }
+}
+
+/* Enhanced footer */
+.md-footer {
+  background: linear-gradient(135deg, var(--llamastack-primary), var(--llamastack-accent));
+}
+
+.md-footer-meta {
+  background: rgba(0, 0, 0, 0.1);
+}
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
new file mode 100644
index 000000000..09675af65
--- /dev/null
+++ b/docs/mkdocs.yml
@@ -0,0 +1,118 @@
+site_name: LlamaStack Kubernetes Operator
+site_description: Kubernetes operator for managing LlamaStack distributions
+site_url: https://llamastack-k8s-operator.pages.dev
+repo_url: https://github.com/llamastack/llama-stack-k8s-operator
+repo_name: llamastack/llama-stack-k8s-operator
+
+# Directory configuration
+docs_dir: content
+site_dir: site
+
+# Theme configuration (Material Design like HyperShift)
+theme:
+  name: material
+  palette:
+    - scheme: default
+      primary: blue
+      accent: blue
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    - scheme: slate
+      primary: blue
+      accent: blue
+      toggle:
+        icon: material/brightness-4
+        name: Switch to light mode
+  features:
+    - navigation.tabs
+    - navigation.sections
+    - navigation.expand
+    - navigation.top
+    - search.highlight
+    - search.share
+    - content.code.copy
+    - content.code.annotate
+    - navigation.footer
+
+# Navigation structure
+nav:
+  - Home: index.md
+  - Getting Started:
+    - Installation: getting-started/installation.md
+    - Quick Start: getting-started/quick-start.md
+    - Understanding Distributions: getting-started/distributions.md
+    - Configuration: getting-started/configuration.md
+  - Distributions:
+    - Starter: distributions/starter.md
+    - vLLM: distributions/vllm.md
+    - Ollama: distributions/ollama.md
+    - AWS Bedrock: distributions/bedrock.md
+    - Text Generation Inference: distributions/tgi.md
+    - Together AI: distributions/together.md
+    - Bring Your Own: distributions/bring-your-own.md
+  - How-to Guides:
+    - Deploy LlamaStack: how-to/deploy-llamastack.md
+    - Configure Storage: how-to/configure-storage.md
+    - Scaling: how-to/scaling.md
+    - Monitoring: how-to/monitoring.md
+    - Troubleshooting: how-to/troubleshooting.md
+  - Reference:
+    - API Reference: reference/api.md
+    - Configuration Reference: reference/configuration.md
+    - CLI Reference: reference/cli.md
+  - Examples:
+    - Basic Deployment: examples/basic-deployment.md
+    - Production Setup: examples/production-setup.md
+    - Custom Images: examples/custom-images.md
+  - Contributing:
+    - Development Guide: contributing/development.md
+    - Testing: contributing/testing.md
+    - Documentation: contributing/documentation.md
+
+# Plugins
+plugins:
+  - search
+  - mermaid2
+
+# Markdown extensions
+markdown_extensions:
+  - admonition
+  - pymdownx.details
+  - pymdownx.superfences:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.tabbed:
+      alternate_style: true
+  - attr_list
+  - md_in_html
+  - toc:
+      permalink: true
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+
+# Extra CSS and JavaScript
+extra_css:
+  - stylesheets/extra.css
+
+extra_javascript:
+  - javascripts/extra.js
+
+# Footer
+extra:
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/llamastack/llama-stack-k8s-operator
+    - icon: fontawesome/brands/docker
+      link: https://quay.io/repository/llamastack/llama-stack-k8s-operator
+  version:
+    provider: mike
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 000000000..6cc0f31b7
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,11 @@
+# MkDocs and theme
+mkdocs>=1.5.0
+mkdocs-material>=9.4.0
+
+# Plugins
+mkdocs-mermaid2-plugin>=1.1.0
+
+# Python dependencies for documentation generation
+PyYAML>=6.0
+Jinja2>=3.1.0
+Markdown>=3.5.0
diff --git a/scripts/build-docs.sh b/scripts/build-docs.sh
new file mode 100755
index 000000000..d4fbec679
--- /dev/null
+++ b/scripts/build-docs.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+set -e
+
+# Build script for LlamaStack Operator Documentation
+# This script builds the documentation site locally for development and testing
+
+echo "🚀 Building LlamaStack Operator Documentation..."
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Function to print colored output
+print_status() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if we're in the right directory
+if [ ! -f "Makefile" ] || [ ! -d "docs" ]; then
+    print_error "This script must be run from the repository root directory"
+    exit 1
+fi
+
+# Check dependencies
+print_status "Checking dependencies..."
+
+# Check Go
+if ! command -v go &> /dev/null; then
+    print_error "Go is not installed. Please install Go 1.21 or later."
+    exit 1
+fi
+
+GO_VERSION=$(go version | grep -oE 'go[0-9]+\.[0-9]+' | sed 's/go//')
+REQUIRED_GO_VERSION="1.21"
+
+if [ "$(printf '%s\n' "$REQUIRED_GO_VERSION" "$GO_VERSION" | sort -V | head -n1)" != "$REQUIRED_GO_VERSION" ]; then
+    print_error "Go version $REQUIRED_GO_VERSION or later is required. Found: $GO_VERSION"
+    exit 1
+fi
+
+print_success "Go version $GO_VERSION found"
+
+# Check Python
+if ! command -v python3 &> /dev/null; then
+    print_error "Python 3 is not installed. Please install Python 3.8 or later."
+    exit 1
+fi
+
+PYTHON_VERSION=$(python3 --version | grep -oE '[0-9]+\.[0-9]+')
+REQUIRED_PYTHON_VERSION="3.8"
+
+if [ "$(printf '%s\n' "$REQUIRED_PYTHON_VERSION" "$PYTHON_VERSION" | sort -V | head -n1)" != "$REQUIRED_PYTHON_VERSION" ]; then
+    print_error "Python version $REQUIRED_PYTHON_VERSION or later is required. Found: $PYTHON_VERSION"
+    exit 1
+fi
+
+print_success "Python version $PYTHON_VERSION found"
+
+# Check pip
+if ! command -v pip3 &> /dev/null; then
+    print_error "pip3 is not installed. Please install pip3."
+    exit 1
+fi
+
+print_success "pip3 found"
+
+# Install Go tools
+print_status "Installing Go documentation tools..."
+
+if ! make crd-ref-docs &> /dev/null; then
+    print_error "Failed to install crd-ref-docs"
+    exit 1
+fi
+
+if ! make gen-crd-api-reference-docs &> /dev/null; then
+    print_warning "gen-crd-api-reference-docs installation failed, continuing with crd-ref-docs only"
+fi
+
+print_success "Go tools installed"
+
+# Install Python dependencies
+print_status "Installing Python dependencies..."
+
+if [ -f "docs/requirements.txt" ]; then
+    if ! pip3 install -r docs/requirements.txt; then
+        print_error "Failed to install Python dependencies"
+        exit 1
+    fi
+    print_success "Python dependencies installed"
+else
+    print_error "docs/requirements.txt not found"
+    exit 1
+fi
+
+# Generate API documentation
+print_status "Generating API documentation..."
+
+if ! make api-docs; then
+    print_error "Failed to generate API documentation"
+    exit 1
+fi
+
+print_success "API documentation generated"
+
+# Build documentation site
+print_status "Building documentation site..."
+
+if ! make docs-build; then
+    print_error "Failed to build documentation site"
+    exit 1
+fi
+
+print_success "Documentation site built successfully"
+
+# Check if site directory exists and has content
+if [ ! -d "docs/site" ]; then
+    print_error "Documentation site directory not found"
+    exit 1
+fi
+
+SITE_SIZE=$(du -sh docs/site | cut -f1)
+print_success "Documentation site built (${SITE_SIZE})"
+
+# Validate the build
+print_status "Validating build..."
+
+# Check for index.html
+if [ ! -f "docs/site/index.html" ]; then
+    print_error "index.html not found in build output"
+    exit 1
+fi
+
+# Check for API documentation
+if [ ! -f "docs/site/reference/api/index.html" ]; then
+    print_warning "API reference page not found, but continuing..."
+fi
+
+# Check for assets
+if [ ! -d "docs/site/assets" ]; then
+    print_warning "Assets directory not found, but continuing..."
+fi
+
+print_success "Build validation completed"
+
+# Display build information
+echo ""
+echo "📊 Build Summary:"
+echo "=================="
+echo "📁 Output directory: docs/site/"
+echo "📏 Site size: ${SITE_SIZE}"
+echo "🔗 Local preview: http://localhost:8000"
+echo ""
+
+# Offer to serve the site locally
+read -p "🌐 Would you like to serve the documentation locally? (y/N): " -n 1 -r
+echo
+if [[ $REPLY =~ ^[Yy]$ ]]; then
+    print_status "Starting local server..."
+    echo "📖 Documentation will be available at: http://localhost:8000"
+    echo "🛑 Press Ctrl+C to stop the server"
+    echo ""
+    
+    cd docs && python3 -m mkdocs serve --dev-addr 0.0.0.0:8000
+else
+    echo ""
+    print_success "Documentation build completed successfully!"
+    echo ""
+    echo "To serve the documentation locally, run:"
+    echo "  cd docs && mkdocs serve"
+    echo ""
+    echo "Or use the Makefile target:"
+    echo "  make docs-serve"
+    echo ""
+fi
diff --git a/wrangler.toml b/wrangler.toml
new file mode 100644
index 000000000..eed412e6b
--- /dev/null
+++ b/wrangler.toml
@@ -0,0 +1,81 @@
+name = "llamastack-k8s-operator-docs"
+compatibility_date = "2024-01-15"
+
+[env.production]
+name = "llamastack-k8s-operator-docs"
+
+[env.preview]
+name = "llamastack-k8s-operator-docs-preview"
+
+# Build configuration
+[build]
+command = "make docs-build"
+cwd = "."
+watch_dir = ["docs", "api", "crd-ref-docs.config.yaml"]
+
+[build.environment_variables]
+GO_VERSION = "1.21"
+PYTHON_VERSION = "3.11"
+NODE_VERSION = "18"
+
+# Pages configuration
+[pages]
+build_output_dir = "docs/site"
+build_caching = true
+
+# Custom headers for security and performance
+[[pages.headers]]
+for = "/*"
+[pages.headers.values]
+X-Frame-Options = "DENY"
+X-Content-Type-Options = "nosniff"
+X-XSS-Protection = "1; mode=block"
+Referrer-Policy = "strict-origin-when-cross-origin"
+Permissions-Policy = "camera=(), microphone=(), geolocation=()"
+
+[[pages.headers]]
+for = "*.css"
+[pages.headers.values]
+Cache-Control = "public, max-age=31536000, immutable"
+
+[[pages.headers]]
+for = "*.js"
+[pages.headers.values]
+Cache-Control = "public, max-age=31536000, immutable"
+
+[[pages.headers]]
+for = "*.woff2"
+[pages.headers.values]
+Cache-Control = "public, max-age=31536000, immutable"
+
+[[pages.headers]]
+for = "*.png"
+[pages.headers.values]
+Cache-Control = "public, max-age=31536000, immutable"
+
+[[pages.headers]]
+for = "*.jpg"
+[pages.headers.values]
+Cache-Control = "public, max-age=31536000, immutable"
+
+# Redirects for better UX
+[[pages.redirects]]
+from = "/api"
+to = "/reference/api/"
+status = 301
+
+[[pages.redirects]]
+from = "/docs"
+to = "/"
+status = 301
+
+[[pages.redirects]]
+from = "/getting-started"
+to = "/getting-started/installation/"
+status = 301
+
+# SPA fallback for client-side routing
+[[pages.redirects]]
+from = "/*"
+to = "/404.html"
+status = 404