Skip to content

Commit 966f418

Browse files
Merge pull request #258 from docker/slim/embeddings
add find-tool embeddings
2 parents e71d249 + 6282c3b commit 966f418

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+8496
-838
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,8 @@ dist
88
**/.DS_Store
99
/nix/
1010
/.mcp.json
11+
/test/embeddings/.clj-kondo/
12+
/test/embeddings/.cpcache/
13+
/test/embeddings/.direnv/
14+
/test/embeddings/.envrc
15+
/test/embeddings/.lsp/

cmd/docker-mcp/commands/feature.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func featureListCommand(dockerCli command.Cli, features features.Features) *cobr
153153
fmt.Println()
154154

155155
// Show all known features
156-
knownFeatures := []string{"oauth-interceptor", "mcp-oauth-dcr", "dynamic-tools", "tool-name-prefix"}
156+
knownFeatures := []string{"oauth-interceptor", "mcp-oauth-dcr", "dynamic-tools", "tool-name-prefix", "use-embeddings"}
157157
if !features.IsRunningInDockerDesktop() {
158158
knownFeatures = append(knownFeatures, "profiles")
159159
}
@@ -177,6 +177,8 @@ func featureListCommand(dockerCli command.Cli, features features.Features) *cobr
177177
fmt.Printf(" %-20s %s\n", "", "Enable profile management tools (docker mcp profile <subcommand>)")
178178
case "tool-name-prefix":
179179
fmt.Printf(" %-20s %s\n", "", "Prefix all tool names with server name to avoid conflicts")
180+
case "use-embeddings":
181+
fmt.Printf(" %-20s %s\n", "", "Enable vector similarity search for find-tools (requires OPENAI_API_KEY)")
180182
}
181183
fmt.Println()
182184
}
@@ -215,6 +217,7 @@ func isFeatureEnabledFromConfig(configFile *configfile.ConfigFile, feature strin
215217
// Features that are enabled by default
216218
defaultEnabledFeatures := map[string]bool{
217219
"mcp-oauth-dcr": true,
220+
"dynamic-tools": true,
218221
}
219222

220223
if configFile.Features == nil {
@@ -246,6 +249,7 @@ func isKnownFeature(feature string, features features.Features) bool {
246249
"mcp-oauth-dcr",
247250
"dynamic-tools",
248251
"tool-name-prefix",
252+
"use-embeddings",
249253
}
250254
if !features.IsRunningInDockerDesktop() {
251255
knownFeatures = append(knownFeatures, "profiles")

cmd/docker-mcp/commands/feature_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,15 @@ func TestIsFeatureEnabledDynamicTools(t *testing.T) {
6565
Features: make(map[string]string),
6666
}
6767
enabled := isFeatureEnabledFromConfig(configFile, "dynamic-tools")
68-
assert.False(t, enabled, "dynamic-tools should default to disabled when missing")
68+
assert.True(t, enabled, "dynamic-tools should default to enabled when missing")
6969
})
7070

7171
t.Run("nil features map", func(t *testing.T) {
7272
configFile := &configfile.ConfigFile{
7373
Features: nil,
7474
}
7575
enabled := isFeatureEnabledFromConfig(configFile, "dynamic-tools")
76-
assert.False(t, enabled, "dynamic-tools should default to disabled when Features is nil")
76+
assert.True(t, enabled, "dynamic-tools should default to enabled when Features is nil")
7777
})
7878
}
7979

cmd/docker-mcp/commands/gateway.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ func gatewayCommand(docker docker.Client, dockerCli command.Cli, features featur
9999
// Check if tool name prefix feature is enabled
100100
options.ToolNamePrefix = isToolNamePrefixFeatureEnabled(dockerCli)
101101

102+
// Check if use-embeddings feature is enabled
103+
options.UseEmbeddings = isUseEmbeddingsFeatureEnabled(dockerCli)
104+
102105
// Update catalog URL based on mcp-oauth-dcr flag if using default Docker catalog URL
103106
if len(options.CatalogPath) == 1 && (options.CatalogPath[0] == catalog.DockerCatalogURLV2 || options.CatalogPath[0] == catalog.DockerCatalogURLV3) {
104107
options.CatalogPath[0] = catalog.GetDockerCatalogURL(options.McpOAuthDcrEnabled)
@@ -375,3 +378,17 @@ func setLegacyDefaults(options *gateway.Config) {
375378
}
376379
}
377380
}
381+
382+
// isUseEmbeddingsFeatureEnabled checks if the use-embeddings feature is enabled
383+
func isUseEmbeddingsFeatureEnabled(dockerCli command.Cli) bool {
384+
configFile := dockerCli.ConfigFile()
385+
if configFile == nil || configFile.Features == nil {
386+
return false
387+
}
388+
389+
value, exists := configFile.Features["use-embeddings"]
390+
if !exists {
391+
return false
392+
}
393+
return value == "enabled"
394+
}
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
---
2+
marp: true
3+
theme: default
4+
paginate: true
5+
---
6+
7+
# Vector Embeddings for MCP Gateway
8+
9+
**Semantic Search for MCP Servers and Tools**
10+
11+
---
12+
13+
## Overview
14+
15+
Added AI-powered semantic search capabilities to the MCP Gateway using OpenAI embeddings and SQLite vector database.
16+
17+
**Key Features:**
18+
- Vector similarity search for finding relevant MCP servers
19+
- Feature flag system for opt-in usage
20+
- Graceful fallback to keyword search
21+
22+
---
23+
24+
## Components
25+
26+
```mermaid
27+
flowchart TD
28+
A["MCP Gateway"] --> B["Embeddings Client"]
29+
B --> C["Docker Container"]
30+
C --> D["vector-db MCP Server"]
31+
D --> E["SQLite-vec extension"]
32+
E --> F["~/.docker/mcp/vectors.db"]
33+
A --> G["OpenAI API"]
34+
G --> H["text-embedding-3-small"]
35+
style A fill:#e1f5ff
36+
style B fill:#fff3e0
37+
style D fill:#f3e5f5
38+
```
39+
40+
---
41+
42+
## Feature Flag System
43+
44+
```bash
45+
# Enable embeddings feature
46+
docker mcp feature enable use-embeddings
47+
48+
# Set OpenAI API key
49+
export OPENAI_API_KEY="sk-..."
50+
```
51+
52+
**Configuration:**
53+
- Feature: `use-embeddings` (disabled by default)
54+
- Storage: `~/.docker/mcp/vectors.db`
55+
- Model: OpenAI `text-embedding-3-small` (1536 dimensions)
56+
57+
---
58+
59+
## How mcp-find Works with Embeddings
60+
61+
```mermaid
62+
sequenceDiagram
63+
participant User
64+
participant Gateway
65+
participant OpenAI
66+
participant VectorDB
67+
participant Catalog
68+
User->>Gateway: mcp-find "tools for github"
69+
alt Embeddings Enabled
70+
Gateway->>OpenAI: Generate embedding for query
71+
OpenAI-->>Gateway: [0.014, -0.018, ...]
72+
Gateway->>VectorDB: Search mcp-server-collection
73+
VectorDB-->>Gateway: Top 5 similar servers
74+
Gateway->>Catalog: Lookup server details
75+
Catalog-->>Gateway: Server metadata
76+
else Embeddings Disabled
77+
Gateway->>Catalog: Keyword search (title/description)
78+
Catalog-->>Gateway: Matching servers
79+
end
80+
Gateway-->>User: Server results
81+
```
82+
83+
---
84+
85+
## Implementation: VectorDBClient
86+
87+
```go
88+
type VectorDBClient struct {
89+
cmd *exec.Cmd
90+
client *mcp.Client
91+
session *mcp.ClientSession
92+
}
93+
94+
// Connects to Docker container running vector-db
95+
func NewVectorDBClient(ctx context.Context, dataDir string) (*VectorDBClient, error) {
96+
cmd := exec.CommandContext(ctx,
97+
"docker", "run", "-i", "--rm",
98+
"-v", fmt.Sprintf("%s:/data", dataDir),
99+
"-e", "DB_PATH=/data/vectors.db",
100+
"-e", "VECTOR_DIMENSION=1536",
101+
"jimclark106/vector-db:latest",
102+
)
103+
// ... MCP client setup with CommandTransport
104+
}
105+
```
106+
107+
---
108+
109+
## Key Operations
110+
111+
```go
112+
type SearchArgs struct {
113+
Vector []float64 `json:"vector"`
114+
CollectionName string `json:"collection_name,omitempty"`
115+
ExcludeCollections []string `json:"exclude_collections,omitempty"`
116+
Limit int `json:"limit,omitempty"`
117+
}
118+
119+
func (c *VectorDBClient) SearchVectors(
120+
ctx context.Context,
121+
vector []float64,
122+
options *SearchOptions,
123+
) ([]SearchResult, error)
124+
```
125+
**Collections:**
126+
- `Tool embeddings`: one vector collection/server
127+
- `mcp-server-collection`: MCP server metadata
128+
129+
---
130+
131+
## Updated Tools
132+
133+
### mcp-find Tool
134+
- **With Embeddings:** Semantic search against `mcp-server-collection`
135+
- **Without Embeddings:** Traditional keyword matching
136+
- Returns: Server name, description, secrets, config schema
137+
138+
### find-tools Tool
139+
- **With Embeddings:** AI-powered server recommendations
140+
- **Without Embeddings:** Error message with guidance
141+
- Input: Natural language task description
142+
- Output: Relevant servers from catalog
143+
144+
---
145+
146+
## Code Structure
147+
148+
```
149+
pkg/gateway/
150+
├── embeddings/
151+
│ └── client.go # VectorDBClient implementation
152+
├── findmcps.go # findServersByEmbedding()
153+
├── findtools.go # generateEmbedding() + find-tools tool
154+
└── dynamic_mcps.go # mcp-find tool + decodeArguments()
155+
156+
cmd/docker-mcp/commands/
157+
├── feature.go # use-embeddings feature flag
158+
└── gateway.go # Feature check + client init
159+
```
160+
161+
---
162+
163+
## Embeddings Client Lifecycle
164+
165+
```mermaid
166+
stateDiagram-v2
167+
[*] --> Checking: Gateway starts
168+
Checking --> Disabled: Feature flag off
169+
Checking --> Disabled: No OPENAI_API_KEY
170+
Checking --> Initializing: Feature enabled + API key set
171+
Initializing --> Running: Docker container started
172+
Initializing --> Disabled: Init failed (logged warning)
173+
Running --> SearchVectors: find-tools called
174+
Running --> AddVector: Store embeddings
175+
Running --> ListCollections: Manage collections
176+
Running --> [*]: Gateway stops
177+
Disabled --> [*]: Gateway stops
178+
```
179+
180+
---
181+
182+
## Data Flow: Semantic Search
183+
184+
```mermaid
185+
flowchart LR
186+
A[User Query] --> B[Generate Embedding]
187+
B --> C[OpenAI API]
188+
C --> D[1536-dim Vector]
189+
D --> E[Search VectorDB]
190+
E --> F[Top K Results]
191+
F --> G[Extract Server Names]
192+
G --> H[Lookup in Catalog]
193+
H --> I[Return Server Info]
194+
style A fill:#e3f2fd
195+
style D fill:#fff3e0
196+
style F fill:#f3e5f5
197+
style I fill:#e8f5e9
198+
```
199+
200+
---
201+
202+
## Benefits
203+
204+
✅ **Natural Language Search**
205+
- "tools for github" → github server
206+
- "database queries" → sqlite server
207+
208+
✅ **Better Discovery**
209+
- Semantic matching vs exact keywords
210+
- Handles synonyms and related concepts
211+
212+
✅ **Graceful Degradation**
213+
- Falls back to keyword search if unavailable
214+
- Gateway continues working without embeddings
215+
216+
✅ **Opt-in Feature**
217+
- Disabled by default
218+
- Requires explicit enablement + API key
219+
220+
---
221+
222+
## Testing & Validation
223+
224+
**Build:**
225+
```bash
226+
make docker-mcp
227+
make lint
228+
```
229+
230+
**Enable Feature:**
231+
```bash
232+
docker mcp feature enable use-embeddings
233+
export OPENAI_API_KEY="sk-..."
234+
docker mcp gateway run
235+
```
236+
237+
**Test Search:**
238+
```bash
239+
# Via Claude Desktop or other MCP client
240+
Find mcp tools that can help me ...
241+
```
242+
243+
---
244+
245+
## Implementation Details
246+
247+
**Files Created:**
248+
- `pkg/gateway/embeddings/client.go` (318 lines)
249+
- `pkg/gateway/findmcps.go` (83 lines)
250+
251+
**Files Modified:**
252+
- `pkg/gateway/dynamic_mcps.go` - Base64 decoding + mcp-find handler
253+
- `pkg/gateway/findtools.go` - Embedding generation
254+
- `pkg/gateway/run.go` - Client initialization
255+
- `pkg/gateway/config.go` - UseEmbeddings flag
256+
- `cmd/docker-mcp/commands/feature.go` - Feature registration
257+
- `cmd/docker-mcp/commands/gateway.go` - Feature check
258+
259+
---
260+
261+
## Future Enhancements
262+
263+
🔮 **Possible Improvements:**
264+
- distribute embeddings with catalogs
265+
- Support other embedding models and providers
266+
- Automatic reindexing on catalog changes
267+
268+
---
269+
270+
## Summary
271+
272+
✨ **What We Built:**
273+
- sqlite-vec integration
274+
- LLM embeddings integration
275+
- Semantic search for MCP servers
276+
277+
🎯 **Impact:**
278+
- Better server/tool discovery
279+
- Natural language queries
280+
- Foundation for AI-powered gateway
281+
282+
---
283+
284+
## Questions?
285+
286+
**Documentation:**
287+
- Feature flags: `docker mcp feature ls`
288+
- sqlite-vec service image: `jimclark106/vector-db:latest`
289+
- model: `text-embedding-3-small`
290+
291+
**Storage:**
292+
- `~/.docker/mcp/vectors.db`
293+
- `~/.docker/config.json` (feature flags)
294+
295+
**Code:**
296+
- Branch: `slim/embeddings`
297+
- Main files: `pkg/gateway/embeddings/`, `findmcps.go`

0 commit comments

Comments
 (0)