Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ dist
**/.DS_Store
/nix/
/.mcp.json
/test/embeddings/.clj-kondo/
/test/embeddings/.cpcache/
/test/embeddings/.direnv/
/test/embeddings/.envrc
/test/embeddings/.lsp/
6 changes: 5 additions & 1 deletion cmd/docker-mcp/commands/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func featureListCommand(dockerCli command.Cli, features features.Features) *cobr
fmt.Println()

// Show all known features
knownFeatures := []string{"oauth-interceptor", "mcp-oauth-dcr", "dynamic-tools", "tool-name-prefix"}
knownFeatures := []string{"oauth-interceptor", "mcp-oauth-dcr", "dynamic-tools", "tool-name-prefix", "use-embeddings"}
if !features.IsRunningInDockerDesktop() {
knownFeatures = append(knownFeatures, "profiles")
}
Expand All @@ -177,6 +177,8 @@ func featureListCommand(dockerCli command.Cli, features features.Features) *cobr
fmt.Printf(" %-20s %s\n", "", "Enable profile management tools (docker mcp profile <subcommand>)")
case "tool-name-prefix":
fmt.Printf(" %-20s %s\n", "", "Prefix all tool names with server name to avoid conflicts")
case "use-embeddings":
fmt.Printf(" %-20s %s\n", "", "Enable vector similarity search for find-tools (requires OPENAI_API_KEY)")
}
fmt.Println()
}
Expand Down Expand Up @@ -215,6 +217,7 @@ func isFeatureEnabledFromConfig(configFile *configfile.ConfigFile, feature strin
// Features that are enabled by default
defaultEnabledFeatures := map[string]bool{
"mcp-oauth-dcr": true,
"dynamic-tools": true,
}

if configFile.Features == nil {
Expand Down Expand Up @@ -246,6 +249,7 @@ func isKnownFeature(feature string, features features.Features) bool {
"mcp-oauth-dcr",
"dynamic-tools",
"tool-name-prefix",
"use-embeddings",
}
if !features.IsRunningInDockerDesktop() {
knownFeatures = append(knownFeatures, "profiles")
Expand Down
4 changes: 2 additions & 2 deletions cmd/docker-mcp/commands/feature_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,15 @@ func TestIsFeatureEnabledDynamicTools(t *testing.T) {
Features: make(map[string]string),
}
enabled := isFeatureEnabledFromConfig(configFile, "dynamic-tools")
assert.False(t, enabled, "dynamic-tools should default to disabled when missing")
assert.True(t, enabled, "dynamic-tools should default to enabled when missing")
})

t.Run("nil features map", func(t *testing.T) {
configFile := &configfile.ConfigFile{
Features: nil,
}
enabled := isFeatureEnabledFromConfig(configFile, "dynamic-tools")
assert.False(t, enabled, "dynamic-tools should default to disabled when Features is nil")
assert.True(t, enabled, "dynamic-tools should default to enabled when Features is nil")
})
}

Expand Down
17 changes: 17 additions & 0 deletions cmd/docker-mcp/commands/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ func gatewayCommand(docker docker.Client, dockerCli command.Cli, features featur
// Check if tool name prefix feature is enabled
options.ToolNamePrefix = isToolNamePrefixFeatureEnabled(dockerCli)

// Check if use-embeddings feature is enabled
options.UseEmbeddings = isUseEmbeddingsFeatureEnabled(dockerCli)

// Update catalog URL based on mcp-oauth-dcr flag if using default Docker catalog URL
if len(options.CatalogPath) == 1 && (options.CatalogPath[0] == catalog.DockerCatalogURLV2 || options.CatalogPath[0] == catalog.DockerCatalogURLV3) {
options.CatalogPath[0] = catalog.GetDockerCatalogURL(options.McpOAuthDcrEnabled)
Expand Down Expand Up @@ -375,3 +378,17 @@ func setLegacyDefaults(options *gateway.Config) {
}
}
}

// isUseEmbeddingsFeatureEnabled checks if the use-embeddings feature is enabled
func isUseEmbeddingsFeatureEnabled(dockerCli command.Cli) bool {
configFile := dockerCli.ConfigFile()
if configFile == nil || configFile.Features == nil {
return false
}

value, exists := configFile.Features["use-embeddings"]
if !exists {
return false
}
return value == "enabled"
}
297 changes: 297 additions & 0 deletions docs/feature-specs/embeddings-feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
---
marp: true
theme: default
paginate: true
---

# Vector Embeddings for MCP Gateway

**Semantic Search for MCP Servers and Tools**

---

## Overview

Added AI-powered semantic search capabilities to the MCP Gateway using OpenAI embeddings and SQLite vector database.

**Key Features:**
- Vector similarity search for finding relevant MCP servers
- Feature flag system for opt-in usage
- Graceful fallback to keyword search

---

## Components

```mermaid
flowchart TD
A["MCP Gateway"] --> B["Embeddings Client"]
B --> C["Docker Container"]
C --> D["vector-db MCP Server"]
D --> E["SQLite-vec extension"]
E --> F["~/.docker/mcp/vectors.db"]
A --> G["OpenAI API"]
G --> H["text-embedding-3-small"]
style A fill:#e1f5ff
style B fill:#fff3e0
style D fill:#f3e5f5
```

---

## Feature Flag System

```bash
# Enable embeddings feature
docker mcp feature enable use-embeddings

# Set OpenAI API key
export OPENAI_API_KEY="sk-..."
```

**Configuration:**
- Feature: `use-embeddings` (disabled by default)
- Storage: `~/.docker/mcp/vectors.db`
- Model: OpenAI `text-embedding-3-small` (1536 dimensions)

---

## How mcp-find Works with Embeddings

```mermaid
sequenceDiagram
participant User
participant Gateway
participant OpenAI
participant VectorDB
participant Catalog
User->>Gateway: mcp-find "tools for github"
alt Embeddings Enabled
Gateway->>OpenAI: Generate embedding for query
OpenAI-->>Gateway: [0.014, -0.018, ...]
Gateway->>VectorDB: Search mcp-server-collection
VectorDB-->>Gateway: Top 5 similar servers
Gateway->>Catalog: Lookup server details
Catalog-->>Gateway: Server metadata
else Embeddings Disabled
Gateway->>Catalog: Keyword search (title/description)
Catalog-->>Gateway: Matching servers
end
Gateway-->>User: Server results
```

---

## Implementation: VectorDBClient

```go
type VectorDBClient struct {
cmd *exec.Cmd
client *mcp.Client
session *mcp.ClientSession
}

// Connects to Docker container running vector-db
func NewVectorDBClient(ctx context.Context, dataDir string) (*VectorDBClient, error) {
cmd := exec.CommandContext(ctx,
"docker", "run", "-i", "--rm",
"-v", fmt.Sprintf("%s:/data", dataDir),
"-e", "DB_PATH=/data/vectors.db",
"-e", "VECTOR_DIMENSION=1536",
"jimclark106/vector-db:latest",
)
// ... MCP client setup with CommandTransport
}
```

---

## Key Operations

```go
type SearchArgs struct {
Vector []float64 `json:"vector"`
CollectionName string `json:"collection_name,omitempty"`
ExcludeCollections []string `json:"exclude_collections,omitempty"`
Limit int `json:"limit,omitempty"`
}

func (c *VectorDBClient) SearchVectors(
ctx context.Context,
vector []float64,
options *SearchOptions,
) ([]SearchResult, error)
```
**Collections:**
- `Tool embeddings`: one vector collection/server
- `mcp-server-collection`: MCP server metadata

---

## Updated Tools

### mcp-find Tool
- **With Embeddings:** Semantic search against `mcp-server-collection`
- **Without Embeddings:** Traditional keyword matching
- Returns: Server name, description, secrets, config schema

### find-tools Tool
- **With Embeddings:** AI-powered server recommendations
- **Without Embeddings:** Error message with guidance
- Input: Natural language task description
- Output: Relevant servers from catalog

---

## Code Structure

```
pkg/gateway/
├── embeddings/
│ └── client.go # VectorDBClient implementation
├── findmcps.go # findServersByEmbedding()
├── findtools.go # generateEmbedding() + find-tools tool
└── dynamic_mcps.go # mcp-find tool + decodeArguments()

cmd/docker-mcp/commands/
├── feature.go # use-embeddings feature flag
└── gateway.go # Feature check + client init
```

---

## Embeddings Client Lifecycle

```mermaid
stateDiagram-v2
[*] --> Checking: Gateway starts
Checking --> Disabled: Feature flag off
Checking --> Disabled: No OPENAI_API_KEY
Checking --> Initializing: Feature enabled + API key set
Initializing --> Running: Docker container started
Initializing --> Disabled: Init failed (logged warning)
Running --> SearchVectors: find-tools called
Running --> AddVector: Store embeddings
Running --> ListCollections: Manage collections
Running --> [*]: Gateway stops
Disabled --> [*]: Gateway stops
```

---

## Data Flow: Semantic Search

```mermaid
flowchart LR
A[User Query] --> B[Generate Embedding]
B --> C[OpenAI API]
C --> D[1536-dim Vector]
D --> E[Search VectorDB]
E --> F[Top K Results]
F --> G[Extract Server Names]
G --> H[Lookup in Catalog]
H --> I[Return Server Info]
style A fill:#e3f2fd
style D fill:#fff3e0
style F fill:#f3e5f5
style I fill:#e8f5e9
```

---

## Benefits

✅ **Natural Language Search**
- "tools for github" → github server
- "database queries" → sqlite server

✅ **Better Discovery**
- Semantic matching vs exact keywords
- Handles synonyms and related concepts

✅ **Graceful Degradation**
- Falls back to keyword search if unavailable
- Gateway continues working without embeddings

✅ **Opt-in Feature**
- Disabled by default
- Requires explicit enablement + API key

---

## Testing & Validation

**Build:**
```bash
make docker-mcp
make lint
```

**Enable Feature:**
```bash
docker mcp feature enable use-embeddings
export OPENAI_API_KEY="sk-..."
docker mcp gateway run
```

**Test Search:**
```bash
# Via Claude Desktop or other MCP client
Find mcp tools that can help me ...
```

---

## Implementation Details

**Files Created:**
- `pkg/gateway/embeddings/client.go` (318 lines)
- `pkg/gateway/findmcps.go` (83 lines)

**Files Modified:**
- `pkg/gateway/dynamic_mcps.go` - Base64 decoding + mcp-find handler
- `pkg/gateway/findtools.go` - Embedding generation
- `pkg/gateway/run.go` - Client initialization
- `pkg/gateway/config.go` - UseEmbeddings flag
- `cmd/docker-mcp/commands/feature.go` - Feature registration
- `cmd/docker-mcp/commands/gateway.go` - Feature check

---

## Future Enhancements

🔮 **Possible Improvements:**
- distribute embeddings with catalogs
- Support other embedding models and providers
- Automatic reindexing on catalog changes

---

## Summary

✨ **What We Built:**
- sqlite-vec integration
- LLM embeddings integration
- Semantic search for MCP servers

🎯 **Impact:**
- Better server/tool discovery
- Natural language queries
- Foundation for AI-powered gateway

---

## Questions?

**Documentation:**
- Feature flags: `docker mcp feature ls`
- sqlite-vec service image: `jimclark106/vector-db:latest`
- model: `text-embedding-3-small`

**Storage:**
- `~/.docker/mcp/vectors.db`
- `~/.docker/config.json` (feature flags)

**Code:**
- Branch: `slim/embeddings`
- Main files: `pkg/gateway/embeddings/`, `findmcps.go`
Loading
Loading