filchy · filchy · Mar 18, 2026 · Mar 18, 2026
diff --git a/docs/api-reference.md b/docs/api-reference.md
@@ -401,6 +401,7 @@ class MyMiddleware(TinyBaseMiddleware):
 
 ```python
 from tinygent.core.datamodels.messages import (
+    TinyUserMessage,
     TinyHumanMessage,
     TinyChatMessage,
     TinySystemMessage,

diff --git a/docs/concepts/memory.md b/docs/concepts/memory.md
@@ -286,9 +286,10 @@ Tinygent supports multiple message types:
 
 ```python
 from tinygent.core.datamodels.messages import (
-    TinyHumanMessage,      # User messages
+    TinyHumanMessage,      # Human messages
     TinyChatMessage,       # AI responses
     TinySystemMessage,     # System prompts
+    TinyUserMessage,       # User prompts
     TinyPlanMessage,       # Planning messages
     TinyToolMessage,       # Tool results
 )

diff --git a/docs/concepts/middleware.md b/docs/concepts/middleware.md
@@ -801,6 +801,126 @@ agent = TinyMultiStepAgent(
 
 ---
 
+### TinyVectorToolSelectorMiddleware
+
+Selects the most relevant tools for each LLM call using semantic similarity between the user query and tool descriptions. No secondary LLM call required — selection is done purely via vector embeddings and cosine similarity.
+
+**Features:**
+- Uses an embedder to compute cosine similarity between the query and each tool description
+- Ranks tools by similarity and selects the top candidates
+- Supports always-include list for critical tools
+- Configurable maximum tools limit and minimum similarity threshold
+- Customizable query and tool transform functions for fine-grained embedding control
+
+**How It Works:**
+1. Before each LLM call, the last `TinyHumanMessage` is embedded as the query
+2. Each tool's name and description is embedded
+3. Cosine similarity is computed between the query and every tool embedding
+4. Tools are ranked by similarity; only those above `similarity_threshold` (up to `max_tools`) are passed to the main agent
+
+**Basic Usage:**
+
+```python
+from tinygent.agents.middleware import TinyVectorToolSelectorMiddleware
+from tinygent.agents import TinyMultiStepAgent
+from tinygent.core.factory import build_embedder, build_llm
+
+selector = TinyVectorToolSelectorMiddleware(
+    embedder=build_embedder('openai:text-embedding-3-small'),
+    similarity_threshold=0.5,
+    max_tools=5,
+)
+
+agent = TinyMultiStepAgent(
+    llm=build_llm('openai:gpt-4o'),
+    tools=[search, calculator, weather, database, email, calendar, notes],
+    middleware=[selector],
+)
+```
+
+**Always Include Critical Tools:**
+
+```python
+selector = TinyVectorToolSelectorMiddleware(
+    embedder=build_embedder('openai:text-embedding-3-small'),
+    similarity_threshold=0.4,
+    max_tools=5,
+    always_include=[search],
+)
+```
+
+**Custom Transform Functions:**
+
+```python
+from tinygent.core.datamodels.tool import AbstractTool
+from tinygent.core.types.io.llm_io_input import TinyLLMInput
+
+def query_transform(llm_input: TinyLLMInput) -> str:
+    # Embed the last 3 messages combined for richer context
+    recent = llm_input.messages[-3:]
+    return ' '.join(m.content for m in recent if hasattr(m, 'content'))
+
+def tool_transform(tool: AbstractTool) -> str:
+    # Repeat name to increase its weight in the embedding
+    return f'{tool.info.name} {tool.info.name}: {tool.info.description}'
+
+selector = TinyVectorToolSelectorMiddleware(
+    embedder=build_embedder('openai:text-embedding-3-small'),
+    similarity_threshold=0.45,
+    max_tools=4,
+    query_transform_fn=query_transform,
+    tool_transform_fn=tool_transform,
+)
+```
+
+**Using Config Factory:**
+
+Transform functions and the similarity threshold can also be set directly on the config:
+
+```python
+from tinygent.agents.middleware import TinyVectorToolSelectorMiddlewareConfig
+
+config = TinyVectorToolSelectorMiddlewareConfig(
+    embedder='openai:text-embedding-3-small',
+    similarity_threshold=0.5,
+    max_tools=5,
+    always_include=['search'],
+    query_transform_fn=query_transform,
+    tool_transform_fn=tool_transform,
+)
+
+selector = config.build()
+```
+
+**Factory Configuration Options:**
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `type` | `Literal['vector_tool_classifier']` | `'vector_tool_classifier'` | Type identifier (frozen) |
+| `embedder` | `AbstractEmbedderConfig \| AbstractEmbedder` | Required | Embedder used to compute similarity. Can be a string like `'openai:text-embedding-3-small'` or an embedder instance |
+| `similarity_threshold` | `float \| None` | `None` | Minimum cosine similarity score for a tool to be selected. `None` = no threshold |
+| `max_tools` | `int \| None` | `None` | Maximum number of tools to select. `None` = no limit |
+| `always_include` | `list[str] \| None` | `None` | List of tool names to always include regardless of similarity score |
+| `query_transform_fn` | `Callable[[TinyLLMInput], str] \| None` | `None` | Custom function to extract the query string from the LLM input. Defaults to last `TinyHumanMessage` found |
+| `tool_transform_fn` | `Callable[[AbstractTool], str] \| None` | `None` | Custom function to produce the text embedded for each tool. Defaults to `"name - description"` |
+
+**LLM vs. Vector Tool Selector:**
+
+| | `TinyLLMToolSelectorMiddleware` | `TinyVectorToolSelectorMiddleware` |
+|---|---|---|
+| Selection method | Secondary LLM call | Cosine similarity |
+| Extra API cost | Yes (LLM tokens) | Yes (embeddings, cheaper) |
+| Latency | Higher | Lower |
+| Accuracy | Higher (understands context) | Good (semantic similarity) |
+| Custom logic | Via prompt template | Via transform functions |
+
+**When to Use:**
+- You have 10+ tools and want lower latency/cost than the LLM selector
+- Tool descriptions are semantically distinct
+- You want deterministic, reproducible selection behavior
+
+---
+
 ## Next Steps
 
 - **[Agents](agents.md)**: Use middleware with agents

diff --git a/docs/examples.md b/docs/examples.md
@@ -323,6 +323,66 @@ Three custom middleware examples:
 uv run examples/agents/middleware/main.py
 ```
 
+---
+
+#### 6. LLM Tool Selector Middleware
+
+**Location**: `examples/agents/middleware/llm_tool_selector_example.py`
+
+Demonstrates intelligent tool selection using a secondary LLM before each agent call.
+
+**Run:**
+
+```bash
+uv run examples/agents/middleware/llm_tool_selector_example.py
+```
+
+---
+
+#### 7. Vector Tool Selector Middleware
+
+**Location**: `examples/agents/middleware/vector_tool_selector_example.py`
+
+Demonstrates tool selection using semantic similarity (embeddings + cosine similarity) — no secondary LLM call needed.
+
+**Run:**
+
+```bash
+uv run examples/agents/middleware/vector_tool_selector_example.py
+```
+
+**Highlights:**
+
+```python
+from tinygent.agents.middleware import TinyVectorToolSelectorMiddlewareConfig
+from tinygent.core.factory import build_embedder
+
+# Basic: embed query, rank tools by cosine similarity
+selector = TinyVectorToolSelectorMiddlewareConfig(
+    embedder=build_embedder('openai:text-embedding-3-small'),
+    max_tools=4,
+)
+
+# Always include a critical tool regardless of similarity
+selector = TinyVectorToolSelectorMiddlewareConfig(
+    embedder=build_embedder('openai:text-embedding-3-small'),
+    max_tools=4,
+    always_include=[greet],
+)
+
+# Custom transform functions for fine-grained embedding control
+from tinygent.agents.middleware.vector_tool_selector import TinyVectorToolSelectorMiddleware
+
+selector = TinyVectorToolSelectorMiddleware(
+    embedder=build_embedder('openai:text-embedding-3-small'),
+    max_tools=4,
+    query_transform_fn=lambda llm_input: ' '.join(
+        m.content for m in llm_input.messages[-3:] if hasattr(m, 'content')
+    ),
+    tool_transform_fn=lambda tool: f'{tool.info.name}: {tool.info.description}',
+)
+```
+
 **Highlights:**
 
 ```python

diff --git a/examples/agents/middleware/README.md b/examples/agents/middleware/README.md
@@ -8,6 +8,8 @@ This example demonstrates how to use **middleware** in TinyGent agents. Middlewa
 uv sync --extra openai
 
 uv run examples/agents/middleware/main.py
+uv run examples/agents/middleware/llm_tool_selector_example.py
+uv run examples/agents/middleware/vector_tool_selector_example.py
 ```
 
 ## Concept

diff --git a/examples/agents/middleware/llm_tool_selector_example.py b/examples/agents/middleware/llm_tool_selector_example.py
@@ -108,7 +108,9 @@ def example_1_basic_selection() -> None:
         middleware=[selector],
     )
 
-    result = agent.run('Greet Alice and then add 5 and 7')
+    result = agent.run(
+        'Greet Alice and then tell her what is weather like in San Francisco'
+    )
     print(f'Result: {result}\n')
 
 
@@ -151,7 +153,7 @@ def example_3_always_include() -> None:
     selector = build_middleware(
         'llm_tool_selector',
         llm=build_llm('openai:gpt-4o-mini'),
-        always_include=['greet'],
+        always_include=[greet],
     )
 
     agent = build_agent(
@@ -182,7 +184,7 @@ def example_4_combined_constraints() -> None:
     selector = TinyLLMToolSelectorMiddlewareConfig(
         llm=build_llm('openai:gpt-4o-mini'),
         max_tools=4,
-        always_include=['greet'],
+        always_include=[greet],
     )
 
     agent = build_agent(