Skip to content
2 changes: 1 addition & 1 deletion agents/analyst/agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ api_key_env = "GROQ_API_KEY"
max_llm_tokens_per_hour = 150000

[capabilities]
tools = ["file_read", "file_write", "file_list", "shell_exec", "web_search", "web_fetch", "memory_store", "memory_recall"]
tools = ["file_read", "file_write", "file_list", "shell_exec", "web_search", "list_searxng_categories", "web_fetch", "memory_store", "memory_recall"]
network = ["*"]
memory_read = ["*"]
memory_write = ["self.*", "shared.*"]
Expand Down
2 changes: 1 addition & 1 deletion agents/coder/agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ max_llm_tokens_per_hour = 200000
max_concurrent_tools = 10

[capabilities]
tools = ["file_read", "file_write", "file_list", "shell_exec", "web_search", "web_fetch", "memory_store", "memory_recall"]
tools = ["file_read", "file_write", "file_list", "shell_exec", "web_search", "list_searxng_categories", "web_fetch", "memory_store", "memory_recall"]
network = ["*"]
memory_read = ["*"]
memory_write = ["self.*"]
Expand Down
2 changes: 1 addition & 1 deletion agents/data-scientist/agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ api_key_env = "GROQ_API_KEY"
max_llm_tokens_per_hour = 150000

[capabilities]
tools = ["file_read", "file_write", "file_list", "shell_exec", "web_search", "web_fetch", "memory_store", "memory_recall"]
tools = ["file_read", "file_write", "file_list", "shell_exec", "web_search", "list_searxng_categories", "web_fetch", "memory_store", "memory_recall"]
network = ["*"]
memory_read = ["*"]
memory_write = ["self.*", "shared.*"]
Expand Down
2 changes: 1 addition & 1 deletion agents/debugger/agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ api_key_env = "GROQ_API_KEY"
max_llm_tokens_per_hour = 150000

[capabilities]
tools = ["file_read", "file_write", "file_list", "shell_exec", "web_search", "web_fetch", "memory_store", "memory_recall"]
tools = ["file_read", "file_write", "file_list", "shell_exec", "web_search", "list_searxng_categories", "web_fetch", "memory_store", "memory_recall"]
network = ["*"]
memory_read = ["*"]
memory_write = ["self.*", "shared.*"]
Expand Down
2 changes: 1 addition & 1 deletion agents/hello-world/agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Keep responses brief (2-4 paragraphs max) unless the user asks for detail."""
max_llm_tokens_per_hour = 100000

[capabilities]
tools = ["file_read", "file_list", "web_fetch", "web_search", "memory_store", "memory_recall"]
tools = ["file_read", "file_list", "web_fetch", "web_search", "list_searxng_categories", "memory_store", "memory_recall"]
network = ["*"]
memory_read = ["*"]
memory_write = ["self.*"]
Expand Down
2 changes: 1 addition & 1 deletion agents/researcher/agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ api_key_env = "GROQ_API_KEY"
max_llm_tokens_per_hour = 150000

[capabilities]
tools = ["web_search", "web_fetch", "file_read", "file_write", "file_list", "memory_store", "memory_recall"]
tools = ["web_search", "list_searxng_categories", "web_fetch", "file_read", "file_write", "file_list", "memory_store", "memory_recall"]
network = ["*"]
memory_read = ["*"]
memory_write = ["self.*", "shared.*"]
2 changes: 1 addition & 1 deletion agents/travel-planner/agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ max_llm_tokens_per_hour = 150000
max_concurrent_tools = 5

[capabilities]
tools = ["file_read", "file_write", "file_list", "memory_store", "memory_recall", "web_search", "web_fetch", "browser_navigate", "browser_click", "browser_type", "browser_read_page", "browser_screenshot", "browser_close"]
tools = ["file_read", "file_write", "file_list", "memory_store", "memory_recall", "web_search", "list_searxng_categories", "web_fetch", "browser_navigate", "browser_click", "browser_type", "browser_read_page", "browser_screenshot", "browser_close"]
network = ["*"]
memory_read = ["*"]
memory_write = ["self.*", "shared.*"]
2 changes: 1 addition & 1 deletion agents/writer/agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ api_key_env = "GEMINI_API_KEY"
max_llm_tokens_per_hour = 100000

[capabilities]
tools = ["file_read", "file_write", "file_list", "web_search", "web_fetch", "memory_store", "memory_recall"]
tools = ["file_read", "file_write", "file_list", "web_search", "list_searxng_categories", "web_fetch", "memory_store", "memory_recall"]
network = ["*"]
memory_read = ["*"]
memory_write = ["self.*"]
13 changes: 6 additions & 7 deletions crates/openfang-kernel/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -881,12 +881,12 @@ impl OpenFangKernel {
// Auto-detect embedding provider by checking API key env vars in
// priority order. First match wins.
const API_KEY_PROVIDERS: &[(&str, &str)] = &[
("OPENAI_API_KEY", "openai"),
("GROQ_API_KEY", "groq"),
("MISTRAL_API_KEY", "mistral"),
("TOGETHER_API_KEY", "together"),
("OPENAI_API_KEY", "openai"),
("GROQ_API_KEY", "groq"),
("MISTRAL_API_KEY", "mistral"),
("TOGETHER_API_KEY", "together"),
("FIREWORKS_API_KEY", "fireworks"),
("COHERE_API_KEY", "cohere"),
("COHERE_API_KEY", "cohere"),
];

let detected_from_key = API_KEY_PROVIDERS
Expand Down Expand Up @@ -1127,8 +1127,7 @@ impl OpenFangKernel {
!= entry.manifest.tool_allowlist
|| disk_manifest.tool_blocklist
!= entry.manifest.tool_blocklist
|| disk_manifest.skills
!= entry.manifest.skills
|| disk_manifest.skills != entry.manifest.skills
|| disk_manifest.mcp_servers
!= entry.manifest.mcp_servers;
if changed {
Expand Down
23 changes: 22 additions & 1 deletion crates/openfang-runtime/src/tool_runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,18 @@ pub async fn execute_tool(
tool_web_search_legacy(input).await
}
}
"list_searxng_categories" => {
if let Some(ctx) = web_ctx {
match ctx.search.list_searxng_categories().await {
Ok(categories) => {
Ok(serde_json::to_string(&categories).unwrap_or_else(|_| "[]".to_string()))
}
Err(e) => Err(e),
}
} else {
Err("SearXNG is not configured".to_string())
}
}

// Shell tool — metacharacter check + exec policy + taint check
"shell_exec" => {
Expand Down Expand Up @@ -620,7 +632,7 @@ pub fn builtin_tool_definitions() -> Vec<ToolDefinition> {
},
ToolDefinition {
name: "web_search".to_string(),
description: "Search the web using multiple providers (Tavily, Brave, Perplexity, DuckDuckGo) with automatic fallback. Returns structured results with titles, URLs, and snippets.".to_string(),
description: "Search the web using multiple providers (Tavily, Brave, Perplexity, Searxng, DuckDuckGo) with automatic fallback. Returns structured results with titles, URLs, and snippets.".to_string(),
input_schema: serde_json::json!({
"type": "object",
"properties": {
Expand All @@ -630,6 +642,15 @@ pub fn builtin_tool_definitions() -> Vec<ToolDefinition> {
"required": ["query"]
}),
},
ToolDefinition {
name: "list_searxng_categories".to_string(),
description: "List available search categories from the SearXNG instance. Returns the list of categories the instance supports (e.g., 'general', 'images', 'news', 'videos'). Only works when SearXNG is configured as the search provider.".to_string(),
input_schema: serde_json::json!({
"type": "object",
"properties": {},
"required": []
}),
},
// --- Shell tool ---
ToolDefinition {
name: "shell_exec".to_string(),
Expand Down
6 changes: 5 additions & 1 deletion crates/openfang-runtime/src/web_fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,11 @@ mod tests {
assert!(check_ssrf("http://169.254.169.254/latest/meta-data/", &allow).is_err());
// Also verify hostname-based metadata blocks
let allow2 = vec!["metadata.google.internal".to_string()];
assert!(check_ssrf("http://metadata.google.internal/computeMetadata/v1/", &allow2).is_err());
assert!(check_ssrf(
"http://metadata.google.internal/computeMetadata/v1/",
&allow2
)
.is_err());
}

#[test]
Expand Down
52 changes: 17 additions & 35 deletions crates/openfang-runtime/src/web_search.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Multi-provider web search engine with auto-fallback.
//!
//! Supports 4 providers: Tavily (AI-agent-native), Brave, Perplexity, and
//! DuckDuckGo (zero-config fallback). Auto mode cascades through available
//! Supports 5 providers: Tavily (AI-agent-native), Brave, Perplexity,
//! Searxng (self-hosted), and DuckDuckGo (zero-config fallback). Auto mode cascades through available
//! providers based on configured API keys.
//!
//! All API keys use `Zeroizing<String>` via `resolve_api_key()` to auto-wipe
Expand Down Expand Up @@ -55,7 +55,7 @@ impl WebSearchEngine {
SearchProvider::Tavily => self.search_tavily(query, max_results).await,
SearchProvider::Perplexity => self.search_perplexity(query).await,
SearchProvider::DuckDuckGo => self.search_duckduckgo(query, max_results).await,
SearchProvider::Searxng => self.search_searxng(query, max_results, None, 1).await,
SearchProvider::Searxng => self.search_searxng(query, max_results).await,
SearchProvider::Auto => self.search_auto(query, max_results).await,
};

Expand Down Expand Up @@ -100,7 +100,7 @@ impl WebSearchEngine {
// Searxng fourth (self-hosted, no API key needed)
if !self.config.searxng.url.is_empty() {
debug!("Auto: trying Searxng");
match self.search_searxng(query, max_results, None, 1).await {
match self.search_searxng(query, max_results).await {
Ok(result) => return Ok(result),
Err(e) => warn!("Searxng failed, falling back: {e}"),
}
Expand Down Expand Up @@ -325,46 +325,25 @@ impl WebSearchEngine {
}

/// Search via SearXNG self-hosted instance.
async fn search_searxng(
&self,
query: &str,
max_results: usize,
category: Option<&str>,
page: u32,
) -> Result<String, String> {
///
/// Uses the `!category` syntax embedded in the query string (e.g., `!news rust latest`).
/// Without a category prefix, SearXNG defaults to `general` search.
async fn search_searxng(&self, query: &str, max_results: usize) -> Result<String, String> {
if self.config.searxng.url.is_empty() {
return Err("SearXNG URL is not configured".to_string());
}

let category = category.unwrap_or("general");

// Validate category against SearXNG instance
match self.list_searxng_categories().await {
Ok(cats) => {
if !cats.iter().any(|c| c == category) {
return Err(format!(
"Invalid SearXNG category '{}'. Available: {}",
category,
cats.join(", ")
));
}
}
Err(e) => warn!("Could not validate SearXNG category: {e}"),
}

let limit = max_results;

debug!(query, "Searching via SearXNG");

let resp = self
.client
.get(format!("{}/search", self.config.searxng.url.trim_end_matches('/')))
.query(&[
("q", query),
("format", "json"),
("categories", category),
("page", &page.to_string()),
])
.get(format!(
"{}/search",
self.config.searxng.url.trim_end_matches('/')
))
.query(&[("q", query), ("format", "json")])
.header("User-Agent", "Mozilla/5.0 (compatible; OpenFangAgent/0.1)")
.send()
.await
Expand Down Expand Up @@ -451,7 +430,10 @@ impl WebSearchEngine {

let resp = self
.client
.get(format!("{}/config", self.config.searxng.url.trim_end_matches('/')))
.get(format!(
"{}/config",
self.config.searxng.url.trim_end_matches('/')
))
.header("User-Agent", "Mozilla/5.0 (compatible; OpenFangAgent/0.1)")
.send()
.await
Expand Down
70 changes: 0 additions & 70 deletions crates/openfang-skills/bundled/searxng/SKILL.md

This file was deleted.

49 changes: 31 additions & 18 deletions crates/openfang-skills/bundled/web-search/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,44 @@ You are a research specialist. You help users find accurate, up-to-date informat

## Key Principles

- Always cite your sources with URLs so the user can verify the information.
- Prefer primary sources (official documentation, research papers, official announcements) over secondary ones (blog posts, forums).
- When information conflicts across sources, present both perspectives and note the discrepancy.
- Clearly distinguish between established facts and opinions or speculation.
- State the date of information when recency matters (e.g., pricing, API versions, compatibility).
- Cite sources with URLs so users can verify.
- Prefer primary sources (official docs, research papers) over secondary (blogs, forums).
- When sources conflict, present both perspectives and note the discrepancy.
- Distinguish facts from opinions.
- State the date when recency matters (pricing, API versions).

## Search Techniques

- Start with specific, targeted queries. Use exact phrases in quotes for precise matches.
- Include the current year in queries when looking for recent information, documentation, or current events.
- Use site-specific searches (e.g., `site:docs.python.org`) when you know the authoritative source.
- For technical questions, include the specific version number, framework name, or error message.
- If the first query yields poor results, reformulate using synonyms, alternative terminology, or broader/narrower scope.
- Start with specific queries. Use exact phrases in quotes.
- Include the current year for recent info or docs.
- Use site-specific searches (e.g., `site:docs.python.org`) when you know source.
- For technical questions, include version numbers or error messages.
- If results are poor, reformulate using synonyms or broader/narrower scope.

## SearXNG Search

When SearXNG is configured, `web_search` uses it automatically. Verify with `list_searxng_categories()`.

### Search Syntax

SearXNG supports prefixes:

- **`!` prefix** — Select engine or category: `!wp paris`, `!images Wau Holland`
- **`:` prefix** — Select language: `:fr !wp Wau Holland`

Modifiers are chainable. See SearXNG preferences for full list.

## Synthesizing Results

- Lead with the direct answer, then provide supporting context.
- Organize findings by relevance, not by the order you found them.
- Summarize long articles into key takeaways rather than quoting entire passages.
- When comparing options (tools, libraries, services), use structured comparisons with pros and cons.
- Flag information that may be outdated or from unreliable sources.
- Organize findings by relevance, not by discovery order.
- Summarize long articles into key takeaways.
- Use pros/cons when comparing options.
- Flag outdated or unreliable information.

## Pitfalls to Avoid

- Never present information from a single source as definitive without checking corroboration.
- Do not include URLs you have not verified — broken links erode trust.
- Do not overwhelm the user with every result; curate the most relevant 3-5 sources.
- Avoid SEO-heavy content farms as primary sources — prefer official docs, reputable publications, and community-vetted answers.
- Never present single-source info as definitive without corroboration.
- Do not include unverified URLs — broken links erode trust.
- Do not overwhelm users; curate the most relevant 3-5 sources.
- Avoid SEO-heavy content farms — prefer official docs and community answers.
Loading
Loading