diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index d5b3618..8b5ec47 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -13,8 +13,6 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ pkg-config \ libssl-dev \ ripgrep \ - tmux \ - vim \ jq \ sudo \ build-essential \ diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index c791e08..d4cdf3a 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,27 +1,30 @@ { - "name": "Chrome-CDP Dev", - "build": { - "dockerfile": "Dockerfile", - "context": ".." - }, - "features": { - "ghcr.io/devcontainers/features/common-utils:2": { - "installZsh": "true", - "username": "vscode", - "userUid": "1000", - "userGid": "1000", - "upgradePackages": "false" - }, - "ghcr.io/devcontainers/features/github-cli:1": {}, - "ghcr.io/devcontainers/features/git:1": {} - }, - "customizations": { - "vscode": {} - }, - "containerEnv": { - "Z_AI_API_KEY": "${localEnv:Z_AI_API_KEY}", - "CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS": "1" - }, - "postCreateCommand": "bash .devcontainer/post-create.sh", - "remoteUser": "vscode" -} \ No newline at end of file + "name": "Chrome-CDP Dev", + "build": { + "dockerfile": "Dockerfile", + "context": "..", + }, + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": "true", + "username": "vscode", + "userUid": "1000", + "userGid": "1000", + "upgradePackages": "false", + }, + "ghcr.io/devcontainers/features/github-cli:1": {}, + "ghcr.io/devcontainers/features/git:1": {}, + }, + "mounts": [ + "source=${localEnv:HOME}/.config/gh,target=/home/vscode/.config/gh,type=bind,consistency=cached", + ], + "customizations": { + "vscode": {}, + }, + "containerEnv": { + "Z_AI_API_KEY": "${localEnv:Z_AI_API_KEY}", + "CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS": "1", + }, + "postCreateCommand": "bash .devcontainer/post-create.sh", + "remoteUser": "vscode", +} diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh index 4c0121c..bbe8e53 100644 --- a/.devcontainer/post-create.sh +++ b/.devcontainer/post-create.sh @@ -19,13 +19,6 @@ if [ -z "$CI" ] && [ -z "$GITHUB_ACTIONS" ]; then echo "[Devcontainer Setup] Claude CLI already installed: $(claude --version)" fi - echo "[Devcontainer Setup] Configuring tmux..." - cat > $HOME/.tmux.conf << 'EOF' -# Display pane number -bind-key p display-panes -set display-panes-time 10000 -EOF - echo "[Devcontainer Setup] Configuring claude alias..." echo 'alias claude="claude --allow-dangerously-skip-permissions"' >> $HOME/.bashrc echo 'alias claude="claude --allow-dangerously-skip-permissions"' >> $HOME/.zshrc @@ -63,8 +56,8 @@ EOF "ANTHROPIC_BASE_URL": "https://api.z.ai/api/anthropic", "API_TIMEOUT_MS": "3000000", "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1", - "ANTHROPIC_DEFAULT_OPUS_MODEL": "glm-5", - "ANTHROPIC_DEFAULT_SONNET_MODEL": "glm-4.7", + "ANTHROPIC_DEFAULT_OPUS_MODEL": "glm-5.1", + "ANTHROPIC_DEFAULT_SONNET_MODEL": "glm-5-turbo", "ANTHROPIC_DEFAULT_HAIKU_MODEL": "glm-4.5-air" } } diff --git a/Cargo.lock b/Cargo.lock index 649a1f8..a6a294a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -413,7 +413,7 @@ dependencies = [ [[package]] name = "cypher-rs" version = "0.1.0" -source = "git+https://github.com/sonesuke/cypher-rs?branch=main#09afd652bc858022c04ad04a2aa083a3ccd231ab" +source = "git+https://github.com/sonesuke/cypher-rs?branch=main#ab6fdb71e5f7f8e8f1a04b3afcd707b724fb76b0" dependencies = [ "anyhow", "async-trait", diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 42900b7..e589a82 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -37,6 +37,10 @@ pub enum Commands { /// Filter by date (before), YYYY-MM-DD #[arg(long)] before: Option, + + /// Filter by category (e.g., cs.AI, physics.cond-mat, math.NA) + #[arg(long)] + category: Option, }, /// Fetch paper details by ID Fetch { @@ -107,8 +111,8 @@ pub async fn run() -> anyhow::Result<()> { let client = ArxivClient::new(&config).await?; match cli.command { - Commands::Search { query, limit, after, before } => { - let papers = client.search(&query, limit, after, before, cli.verbose).await?; + Commands::Search { query, limit, after, before, category } => { + let papers = client.search(&query, limit, after, before, category, cli.verbose).await?; let json = serde_json::to_string_pretty(&papers)?; println!("{}", json); } diff --git a/src/core/arxiv_search.rs b/src/core/arxiv_search.rs index fabf0e5..e223fd0 100644 --- a/src/core/arxiv_search.rs +++ b/src/core/arxiv_search.rs @@ -24,6 +24,7 @@ impl ArxivClient { limit: Option, after: Option, before: Option, + category: Option, verbose: bool, ) -> Result> { let mut all_papers = Vec::new(); @@ -33,7 +34,10 @@ impl ArxivClient { if verbose { eprintln!("[VERBOSE] Starting search for query: '{}'", query); - eprintln!("[VERBOSE] limit={:?}, after={:?}, before={:?}", limit, after, before); + eprintln!( + "[VERBOSE] limit={:?}, after={:?}, before={:?}, category={:?}", + limit, after, before, category + ); } loop { @@ -50,7 +54,7 @@ impl ArxivClient { let tab = CdpPage::new(&ws_url).await?; - let url = Self::build_search_url(query, start, &after, &before); + let url = Self::build_search_url(query, start, &after, &before, category.as_deref()); if verbose { eprintln!("[VERBOSE] Navigating to: {}", url); @@ -253,14 +257,83 @@ impl ArxivClient { start: usize, after: &Option, before: &Option, + category: Option<&str>, ) -> String { let encoded_query = urlencoding::encode(query); - if after.is_some() || before.is_some() { + + // Build category filter if specified + let category_filter = if let Some(cat) = category { + // Map category to arXiv archive parameter + // Examples: cs.AI -> computer_science, physics.cond-mat -> physics + let archive_name = + if let Some(dot_idx) = cat.find('.') { &cat[..dot_idx] } else { cat }; + + let archive_param = match archive_name { + "cs" => "computer_science", + "physics" => "physics", + "math" => "math", + "stat" => "statistics", + "q-bio" => "q-bio", + "q-fin" => "q-fin", + "econ" => "economics", + "eess" => "eess", + "astro-ph" => "physics", + "cond-mat" => "physics", + "gr-qc" => "physics", + "hep-ex" => "physics", + "hep-lat" => "physics", + "hep-ph" => "physics", + "hep-th" => "physics", + "nucl-ex" => "physics", + "nucl-th" => "physics", + "quant-ph" => "physics", + "acc-phys" => "physics", + "adapt" => "physics", + "ao" => "physics", + "atom" => "physics", + "atm-clus" => "physics", + "bell" => "physics", + "chem-ph" => "physics", + "comp-gas" => "physics", + "data-an" => "physics", + "dis-nn" => "physics", + "fluid" => "physics", + "gen-ph" => "physics", + "geo-ph" => "physics", + "hist-ph" => "physics", + "ins" => "physics", + "med-ph" => "physics", + "net-si" => "physics", + "other" => "physics", + "plasm-ph" => "physics", + "pop" => "physics", + "proxy" => "physics", + "soc-ph" => "physics", + "space-ph" => "physics", + _ => archive_name, + }; + + // For full category names like "cs.AI", use the full category + // For subcategories like "cond-mat", prepend with "physics." + let full_category = if cat.contains('.') { + cat.to_string() + } else if archive_param == "physics" { + format!("physics.{}", cat) + } else { + format!("{}.{}", archive_param, cat) + }; + + format!("&classification-{}_archives={}", archive_param, full_category) + } else { + String::new() + }; + + if after.is_some() || before.is_some() || category.is_some() { let from_date = after.as_deref().unwrap_or(""); let to_date = before.as_deref().unwrap_or(""); format!( - "https://arxiv.org/search/advanced?advanced=1&terms-0-operator=AND&terms-0-term={}&terms-0-field=all&classification-physics_archives=all&classification-include_cross_list=include&date-filter_by=date_range&date-from_date={}&date-to_date={}&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first&start={}", - encoded_query, from_date, to_date, start + "https://arxiv.org/search/advanced?advanced=1&terms-0-operator=AND&terms-0-term={}&terms-0-field=all&classification-physics_archives=all&classification-include_cross_list=include{}&date-filter_by=date_range&date-from_date={}&date-to_date={}&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first&start={}", + encoded_query, category_filter, from_date, to_date, start ) } else { format!( @@ -285,13 +358,13 @@ mod tests { #[test] fn test_build_search_url_simple() { - let url = ArxivClient::build_search_url("LLM", 0, &None, &None); + let url = ArxivClient::build_search_url("LLM", 0, &None, &None, None); assert_eq!(url, "https://arxiv.org/search/?query=LLM&searchtype=all&source=header&start=0"); } #[test] fn test_build_search_url_with_pagination() { - let url = ArxivClient::build_search_url("LLM", 50, &None, &None); + let url = ArxivClient::build_search_url("LLM", 50, &None, &None, None); assert_eq!( url, "https://arxiv.org/search/?query=LLM&searchtype=all&source=header&start=50" @@ -302,12 +375,31 @@ mod tests { fn test_build_search_url_with_dates() { let after = Some("2023-01-01".to_string()); let before = Some("2023-12-31".to_string()); - let url = ArxivClient::build_search_url("LLM", 0, &after, &before); + let url = ArxivClient::build_search_url("LLM", 0, &after, &before, None); assert!(url.contains("date-filter_by=date_range")); assert!(url.contains("date-from_date=2023-01-01")); assert!(url.contains("date-to_date=2023-12-31")); } + #[test] + fn test_build_search_url_with_category_cs() { + let url = ArxivClient::build_search_url("LLM", 0, &None, &None, Some("cs.AI")); + assert!(url.contains("classification-computer_science_archives=cs.AI")); + } + + #[test] + fn test_build_search_url_with_category_physics() { + let url = + ArxivClient::build_search_url("quantum", 0, &None, &None, Some("physics.quant-ph")); + assert!(url.contains("classification-physics_archives=physics.quant-ph")); + } + + #[test] + fn test_build_search_url_with_category_math() { + let url = ArxivClient::build_search_url("algebra", 0, &None, &None, Some("math.NA")); + assert!(url.contains("classification-math_archives=math.NA")); + } + #[test] fn test_build_fetch_url_id() { let url = ArxivClient::build_fetch_url("2512.04518"); @@ -323,7 +415,8 @@ mod tests { #[test] fn test_build_search_url_with_before_only() { let before = Some("2023-10-13".to_string()); - let url = ArxivClient::build_search_url("conversational data analysis", 0, &None, &before); + let url = + ArxivClient::build_search_url("conversational data analysis", 0, &None, &before, None); assert!(url.contains("date-filter_by=date_range")); assert!(url.contains("date-from_date=&")); assert!(url.contains("date-to_date=2023-10-13")); diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 05634b8..93d1721 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -33,6 +33,12 @@ pub struct SearchPapersRequest { #[schemars(description = "Filter by date (submitted after), format: YYYY-MM-DD")] #[serde(default)] pub after: Option, + + #[schemars( + description = "Filter by arXiv category (e.g., 'cs.AI', 'physics.quant-ph', 'math.NA')" + )] + #[serde(default)] + pub category: Option, } #[derive(Serialize, Deserialize, JsonSchema)] @@ -63,6 +69,7 @@ struct SearchCacheKey { limit: Option, before: Option, after: Option, + category: Option, } impl SearchCacheKey { @@ -72,6 +79,7 @@ impl SearchCacheKey { limit: req.limit, before: req.before.clone(), after: req.after.clone(), + category: req.category.clone(), } } @@ -170,6 +178,7 @@ impl ArxivHandler { request.limit, request.after.clone(), request.before.clone(), + request.category.clone(), false, ) .await