From ce4df37f91ba5fc6dc1754c7cc4f0326218834db Mon Sep 17 00:00:00 2001 From: Mouse Date: Mon, 27 Apr 2026 12:10:18 -0700 Subject: [PATCH] feat: add you.com search provider --- README.md | 16 ++-- config.example.toml | 1 + src/cli.rs | 5 +- src/config.rs | 5 ++ src/engine.rs | 4 +- src/main.rs | 4 +- src/providers/mod.rs | 2 + src/providers/you.rs | 178 +++++++++++++++++++++++++++++++++++++++++++ src/types.rs | 4 +- tests/integration.rs | 4 +- 10 files changed, 210 insertions(+), 13 deletions(-) create mode 100644 src/providers/you.rs diff --git a/README.md b/README.md index c2a8498..c877660 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Search CLI -**One binary, 11 providers, 14 modes. The web search tool your AI agent is missing.** +**One binary, 12 providers, 14 modes. The web search tool your AI agent is missing.**
@@ -19,7 +19,7 @@ --- -A single Rust binary that aggregates Brave, Serper, Exa, Jina, Firecrawl, Tavily, SerpApi, Perplexity, xAI, and more into one unified search interface. Designed from day one for AI agents -- structured JSON output, semantic exit codes, auto-JSON when piped, and parallel fan-out across providers in under 2 seconds. +A single Rust binary that aggregates Brave, Serper, Exa, Jina, Firecrawl, Tavily, SerpApi, Perplexity, xAI, You.com, and more into one unified search interface. Designed from day one for AI agents -- structured JSON output, semantic exit codes, auto-JSON when piped, and parallel fan-out across providers in under 2 seconds. [Install](#install) | [How It Works](#how-it-works) | [Features](#features) | [Providers](#providers) | [Contributing](#contributing) @@ -27,7 +27,7 @@ A single Rust binary that aggregates Brave, Serper, Exa, Jina, Firecrawl, Tavily ## Why This Exists -Every search API is good at something different. Brave has its own 35-billion page index. Serper gives you raw Google results plus Scholar, Patents, and Places. Exa does neural/semantic search. Perplexity gives AI-synthesized answers with citations. Jina reads any URL into clean markdown. Firecrawl renders JavaScript-heavy pages. xAI searches X/Twitter. +Every search API is good at something different. Brave has its own 35-billion page index. Serper gives you raw Google results plus Scholar, Patents, and Places. Exa does neural/semantic search. Perplexity gives AI-synthesized answers with citations. Jina reads any URL into clean markdown. Firecrawl renders JavaScript-heavy pages. xAI searches X/Twitter. You.com offers web and news search with 100 free searches/day without an API key. You shouldn't have to wire up each one separately, handle their different response formats, manage rate limits, or figure out which provider to use for which query type. `search` does all of that for you -- routes your query to the right combination automatically, fans out in parallel, deduplicates results, and gives you a single clean response. @@ -125,8 +125,8 @@ search "your query here" | Mode | What it does | Providers used | |------|-------------|----------------| | `auto` | Detects intent from your query | *varies* | -| `general` | Broad web search | Brave + Serper + Exa + Jina + Tavily + Perplexity | -| `news` | Breaking news, current events | Brave News + Serper News + Tavily + Perplexity | +| `general` | Broad web search | Brave + Serper + Exa + Jina + Tavily + Perplexity + You.com | +| `news` | Breaking news, current events | Brave News + Serper News + Tavily + Perplexity + You.com | | `academic` | Research papers, studies | Exa + Serper + Tavily + Perplexity | | `people` | LinkedIn profiles, bios | Exa | | `deep` | Maximum coverage | Brave (LLM Context) + Exa + Serper + Tavily + Perplexity + xAI | @@ -213,6 +213,7 @@ search "query" 2>/dev/null # suppress diagnostics | **[Tavily](https://tavily.com/)** | General + deep search, research-focused | Broad coverage, research queries | | **[SerpApi](https://serpapi.com/)** | 80+ engines: Google, Bing, YouTube, Baidu | Scholar, multi-engine coverage | | **[Perplexity](https://perplexity.ai/)** | AI-powered answers with citations (Sonar Pro) | Complex queries, synthesized answers | +| **[You.com](https://you.com/platform)** | Web + news search API (`/v1/agents/search`) | General web search and news fallback | | **Browserless** | Cloud browser for Cloudflare/JS-heavy pages | Anti-bot bypass, dynamic rendering | | **Stealth** | Built-in anti-bot scraper | Protected pages, no API key needed | | **[xAI](https://x.ai/)** | X/Twitter search via Grok AI | Tweets, trending topics, social sentiment | @@ -240,6 +241,11 @@ export SEARCH_KEYS_SERPAPI=your-key export SEARCH_KEYS_PERPLEXITY=your-key export SEARCH_KEYS_BROWSERLESS=your-key export SEARCH_KEYS_XAI=your-key +export SEARCH_KEYS_YOU=your-key + +# You.com aliases also supported +export YDC_API_KEY=your-key +export YOU_API_KEY=your-key ``` ## Updating diff --git a/config.example.toml b/config.example.toml index 4c09eeb..01ddc70 100644 --- a/config.example.toml +++ b/config.example.toml @@ -14,6 +14,7 @@ serpapi = "" perplexity = "" browserless = "" xai = "" +you = "" [settings] timeout = 10 diff --git a/src/cli.rs b/src/cli.rs index 08ecaa2..b8872c6 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -6,7 +6,7 @@ use clap::{Parser, Subcommand}; name = "search", version, about = "Agent-friendly multi-provider search CLI", - long_about = "Aggregates 11 search providers with 14 search modes.\n\ + long_about = "Aggregates 12 search providers with 14 search modes.\n\ Auto-detects intent from your query and routes to the best providers.\n\ Outputs colored tables for humans, JSON when piped to other tools.\n\n\ PROVIDERS:\n \ @@ -21,6 +21,7 @@ use clap::{Parser, Subcommand}; browserless Cloud browser for Cloudflare/JS-heavy pages\n \ stealth Anti-bot stealth scraper\n \ xai X/Twitter social search via xAI Grok\n\n\ + you You.com web and news search (YDC API)\n\n\ EXAMPLES:\n \ search \"rust error handling\" # auto-detect mode\n \ search search -q \"CRISPR\" -m academic # academic papers\n \ @@ -115,7 +116,7 @@ pub struct SearchArgs { #[arg(short, long)] pub count: Option, - /// Use only specific providers (comma-separated: brave,serper,exa,jina,firecrawl,tavily,serpapi,perplexity,browserless,stealth,xai) + /// Use only specific providers (comma-separated: parallel,brave,serper,exa,jina,firecrawl,tavily,serpapi,perplexity,browserless,stealth,xai,you) #[arg(short, long, value_delimiter = ',')] pub providers: Option>, diff --git a/src/config.rs b/src/config.rs index 2cc9ee9..8b0f583 100644 --- a/src/config.rs +++ b/src/config.rs @@ -36,6 +36,8 @@ pub struct ApiKeys { pub browserless: String, #[serde(default)] pub xai: String, + #[serde(default)] + pub you: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -68,6 +70,7 @@ impl Default for AppConfig { perplexity: String::new(), browserless: String::new(), xai: String::new(), + you: String::new(), }, settings: Settings { timeout: default_timeout(), @@ -140,6 +143,7 @@ pub fn config_show(config: &AppConfig) { ("perplexity", &config.keys.perplexity, "PERPLEXITY_API_KEY"), ("browserless",&config.keys.browserless, "BROWSERLESS_API_KEY"), ("xai", &config.keys.xai, "XAI_API_KEY"), + ("you", &config.keys.you, "YDC_API_KEY"), ]; if c { println!(" {}", "[keys]".bold()); } else { println!("[keys]"); } @@ -228,6 +232,7 @@ pub fn config_check(config: &AppConfig) { ("perplexity", &config.keys.perplexity, "PERPLEXITY_API_KEY", "AI-powered answers with citations (Perplexity Sonar)"), ("browserless", &config.keys.browserless, "BROWSERLESS_API_KEY", "Cloud browser for Cloudflare/JS-heavy pages"), ("xai", &config.keys.xai, "XAI_API_KEY", "X/Twitter social search via xAI Grok"), + ("you", &config.keys.you, "YDC_API_KEY", "You.com web/news search (100 free searches/day without key)"), ]; if c { diff --git a/src/engine.rs b/src/engine.rs index 4f0a097..29bff14 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -12,8 +12,8 @@ use tokio::time::timeout; /// Which providers to query for each mode fn providers_for_mode(mode: Mode) -> &'static [&'static str] { match mode { - Mode::Auto | Mode::General => &["parallel", "brave", "serper", "exa", "jina", "tavily", "perplexity"], - Mode::News => &["parallel", "brave", "serper", "tavily", "perplexity"], + Mode::Auto | Mode::General => &["parallel", "brave", "serper", "exa", "jina", "tavily", "perplexity", "you"], + Mode::News => &["parallel", "brave", "serper", "tavily", "perplexity", "you"], Mode::Academic => &["exa", "serper", "tavily", "perplexity"], Mode::Deep => &["parallel", "brave", "exa", "serper", "tavily", "perplexity", "xai"], Mode::Scholar => &["serper", "serpapi"], diff --git a/src/main.rs b/src/main.rs index 36547ac..b028bc5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -49,6 +49,7 @@ async fn main() { "api.jina.ai:443", "api.tavily.com:443", "api.perplexity.ai:443", + "api.you.com:443", ]; for domain in domains { let _ = lookup_host(domain).await; @@ -136,6 +137,7 @@ async fn main() { "https://api.search.brave.com/res/v1/web/search", "https://google.serper.dev/search", "https://api.exa.ai/search", + "https://api.you.com/v1/agents/search", ]; for url in urls { let _ = app_c.client.head(url).send().await; @@ -445,7 +447,7 @@ async fn run(cli: Cli, ctx: &Ctx, app: Arc) -> Result) -> Vec> { Box::new(perplexity::Perplexity::new(ctx.clone())), Box::new(serpapi::SerpApi::new(ctx.clone())), Box::new(xai::Xai::new(ctx.clone())), + Box::new(you::You::new(ctx.clone())), ] } diff --git a/src/providers/you.rs b/src/providers/you.rs new file mode 100644 index 0000000..89acbc5 --- /dev/null +++ b/src/providers/you.rs @@ -0,0 +1,178 @@ +use crate::context::AppContext; +use crate::errors::SearchError; +use crate::types::{SearchOpts, SearchResult}; +use async_trait::async_trait; +use std::sync::Arc; +use std::time::Duration; + +pub struct You { + ctx: Arc, +} + +impl You { + pub fn new(ctx: Arc) -> Self { + Self { ctx } + } + + fn api_key(&self) -> String { + let from_config = self.ctx.config.keys.you.trim().to_string(); + if !from_config.is_empty() { + return from_config; + } + std::env::var("YDC_API_KEY") + .or_else(|_| std::env::var("YOU_API_KEY")) + .or_else(|_| std::env::var("SEARCH_KEYS_YOU")) + .unwrap_or_default() + } + + async fn query_endpoint(&self, query: &str, count: usize, opts: &SearchOpts) -> Result { + let mut req = self + .ctx + .client + .get("https://api.you.com/v1/agents/search") + .query(&[("query", query), ("count", &count.to_string())]); + + if let Some(freshness) = &opts.freshness { + req = req.query(&[("freshness", freshness)]); + } + + if !opts.include_domains.is_empty() { + req = req.query(&[("include_domains", opts.include_domains.join(","))]); + } + if !opts.exclude_domains.is_empty() { + req = req.query(&[("exclude_domains", opts.exclude_domains.join(","))]); + } + + let api_key = self.api_key(); + if !api_key.is_empty() { + req = req.header("X-API-Key", api_key); + } + + super::retry_request(|| { + let req = req.try_clone().ok_or_else(|| SearchError::Api { + provider: "you", + code: "request_clone_failed", + message: "failed to clone request builder".to_string(), + }); + async { + let req = req?; + let resp = req.send().await?; + + if resp.status() == 429 { + return Err(SearchError::RateLimited { provider: "you" }); + } + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(SearchError::Api { + provider: "you", + code: "api_error", + message: format!("HTTP {}: {}", status, body), + }); + } + + let body_bytes = resp.bytes().await?; + let mut body_vec = body_bytes.to_vec(); + simd_json::from_slice(&mut body_vec).map_err(|e| SearchError::Api { + provider: "you", + code: "json_error", + message: e.to_string(), + }) + } + }) + .await + } +} + +fn parse_items(arr: Option<&Vec>, source: &str) -> Vec { + arr.map(|items| { + items + .iter() + .map(|item| { + let title = item + .get("title") + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(); + let url = item + .get("url") + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(); + let snippet = item + .get("description") + .and_then(|v| v.as_str()) + .or_else(|| { + item.get("snippets") + .and_then(|v| v.as_array()) + .and_then(|s| s.first()) + .and_then(|v| v.as_str()) + }) + .unwrap_or_default() + .to_string(); + let published = item.get("page_age").and_then(|v| v.as_str()).map(String::from); + let image_url = item + .get("thumbnail_url") + .and_then(|v| v.as_str()) + .map(String::from); + SearchResult { + title, + url, + snippet, + source: source.to_string(), + published, + image_url, + extra: None, + } + }) + .filter(|r| !r.url.is_empty()) + .collect() + }) + .unwrap_or_default() +} + +#[async_trait] +impl super::Provider for You { + fn name(&self) -> &'static str { + "you" + } + + fn capabilities(&self) -> &[&'static str] { + &["general", "news"] + } + + fn env_keys(&self) -> &[&'static str] { + &["YDC_API_KEY", "YOU_API_KEY", "SEARCH_KEYS_YOU"] + } + + fn is_configured(&self) -> bool { + true + } + + fn timeout(&self) -> Duration { + Duration::from_secs(10) + } + + async fn search(&self, query: &str, count: usize, opts: &SearchOpts) -> Result, SearchError> { + let body = self.query_endpoint(query, count, opts).await?; + let web = body + .get("results") + .and_then(|v| v.get("web")) + .and_then(|v| v.as_array()); + Ok(parse_items(web, "you")) + } + + async fn search_news( + &self, + query: &str, + count: usize, + opts: &SearchOpts, + ) -> Result, SearchError> { + let body = self.query_endpoint(query, count, opts).await?; + let news = body + .get("results") + .and_then(|v| v.get("news")) + .and_then(|v| v.as_array()); + Ok(parse_items(news, "you_news")) + } +} diff --git a/src/types.rs b/src/types.rs index 44c56c6..bb4d280 100644 --- a/src/types.rs +++ b/src/types.rs @@ -6,9 +6,9 @@ use std::fmt; pub enum Mode { /// Auto-detect intent from query (default) Auto, - /// General web search (Brave + Serper + Exa + Jina + Tavily + Perplexity) + /// General web search (Brave + Serper + Exa + Jina + Tavily + Perplexity + You.com) General, - /// Breaking news and current events (Brave + Serper + Tavily + Perplexity) + /// Breaking news and current events (Brave + Serper + Tavily + Perplexity + You.com) News, /// Research papers and studies (Exa + Serper + Tavily + Perplexity) Academic, diff --git a/tests/integration.rs b/tests/integration.rs index 69d96ee..4481ac2 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -34,7 +34,7 @@ fn test_help_output() { .arg("--help") .assert() .success() - .stdout(predicate::str::contains("Aggregates 11 search providers")) + .stdout(predicate::str::contains("Aggregates 12 search providers")) .stdout(predicate::str::contains("brave")) .stdout(predicate::str::contains("serper")) .stdout(predicate::str::contains("exa")); @@ -97,6 +97,7 @@ fn test_providers_json() { assert!(names.contains(&"jina")); assert!(names.contains(&"firecrawl")); assert!(names.contains(&"tavily")); + assert!(names.contains(&"you")); } #[test] @@ -111,6 +112,7 @@ fn test_config_check() { .stdout(predicate::str::contains("jina")) .stdout(predicate::str::contains("firecrawl")) .stdout(predicate::str::contains("tavily")); + .stdout(predicate::str::contains("you")); } #[test]