Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
pkg-config \
libssl-dev \
ripgrep \
tmux \
vim \
jq \
sudo \
build-essential \
Expand Down
55 changes: 29 additions & 26 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
{
"name": "Chrome-CDP Dev",
"build": {
"dockerfile": "Dockerfile",
"context": ".."
},
"features": {
"ghcr.io/devcontainers/features/common-utils:2": {
"installZsh": "true",
"username": "vscode",
"userUid": "1000",
"userGid": "1000",
"upgradePackages": "false"
},
"ghcr.io/devcontainers/features/github-cli:1": {},
"ghcr.io/devcontainers/features/git:1": {}
},
"customizations": {
"vscode": {}
},
"containerEnv": {
"Z_AI_API_KEY": "${localEnv:Z_AI_API_KEY}",
"CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS": "1"
},
"postCreateCommand": "bash .devcontainer/post-create.sh",
"remoteUser": "vscode"
}
"name": "Chrome-CDP Dev",
"build": {
"dockerfile": "Dockerfile",
"context": "..",
},
"features": {
"ghcr.io/devcontainers/features/common-utils:2": {
"installZsh": "true",
"username": "vscode",
"userUid": "1000",
"userGid": "1000",
"upgradePackages": "false",
},
"ghcr.io/devcontainers/features/github-cli:1": {},
"ghcr.io/devcontainers/features/git:1": {},
},
"mounts": [
"source=${localEnv:HOME}/.config/gh,target=/home/vscode/.config/gh,type=bind,consistency=cached",
],
"customizations": {
"vscode": {},
},
"containerEnv": {
"Z_AI_API_KEY": "${localEnv:Z_AI_API_KEY}",
"CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS": "1",
},
"postCreateCommand": "bash .devcontainer/post-create.sh",
"remoteUser": "vscode",
}
11 changes: 2 additions & 9 deletions .devcontainer/post-create.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,6 @@ if [ -z "$CI" ] && [ -z "$GITHUB_ACTIONS" ]; then
echo "[Devcontainer Setup] Claude CLI already installed: $(claude --version)"
fi

echo "[Devcontainer Setup] Configuring tmux..."
cat > $HOME/.tmux.conf << 'EOF'
# Display pane number
bind-key p display-panes
set display-panes-time 10000
EOF

echo "[Devcontainer Setup] Configuring claude alias..."
echo 'alias claude="claude --allow-dangerously-skip-permissions"' >> $HOME/.bashrc
echo 'alias claude="claude --allow-dangerously-skip-permissions"' >> $HOME/.zshrc
Expand Down Expand Up @@ -63,8 +56,8 @@ EOF
"ANTHROPIC_BASE_URL": "https://api.z.ai/api/anthropic",
"API_TIMEOUT_MS": "3000000",
"CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
"ANTHROPIC_DEFAULT_OPUS_MODEL": "glm-5",
"ANTHROPIC_DEFAULT_SONNET_MODEL": "glm-4.7",
"ANTHROPIC_DEFAULT_OPUS_MODEL": "glm-5.1",
"ANTHROPIC_DEFAULT_SONNET_MODEL": "glm-5-turbo",
"ANTHROPIC_DEFAULT_HAIKU_MODEL": "glm-4.5-air"
}
}
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ pub enum Commands {
/// Filter by date (before), YYYY-MM-DD
#[arg(long)]
before: Option<String>,

/// Filter by category (e.g., cs.AI, physics.cond-mat, math.NA)
#[arg(long)]
category: Option<String>,
},
/// Fetch paper details by ID
Fetch {
Expand Down Expand Up @@ -107,8 +111,8 @@ pub async fn run() -> anyhow::Result<()> {
let client = ArxivClient::new(&config).await?;

match cli.command {
Commands::Search { query, limit, after, before } => {
let papers = client.search(&query, limit, after, before, cli.verbose).await?;
Commands::Search { query, limit, after, before, category } => {
let papers = client.search(&query, limit, after, before, category, cli.verbose).await?;
let json = serde_json::to_string_pretty(&papers)?;
println!("{}", json);
}
Expand Down
111 changes: 102 additions & 9 deletions src/core/arxiv_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ impl ArxivClient {
limit: Option<usize>,
after: Option<String>,
before: Option<String>,
category: Option<String>,
verbose: bool,
) -> Result<Vec<Paper>> {
let mut all_papers = Vec::new();
Expand All @@ -33,7 +34,10 @@ impl ArxivClient {

if verbose {
eprintln!("[VERBOSE] Starting search for query: '{}'", query);
eprintln!("[VERBOSE] limit={:?}, after={:?}, before={:?}", limit, after, before);
eprintln!(
"[VERBOSE] limit={:?}, after={:?}, before={:?}, category={:?}",
limit, after, before, category
);
}

loop {
Expand All @@ -50,7 +54,7 @@ impl ArxivClient {

let tab = CdpPage::new(&ws_url).await?;

let url = Self::build_search_url(query, start, &after, &before);
let url = Self::build_search_url(query, start, &after, &before, category.as_deref());

if verbose {
eprintln!("[VERBOSE] Navigating to: {}", url);
Expand Down Expand Up @@ -253,14 +257,83 @@ impl ArxivClient {
start: usize,
after: &Option<String>,
before: &Option<String>,
category: Option<&str>,
) -> String {
let encoded_query = urlencoding::encode(query);
if after.is_some() || before.is_some() {

// Build category filter if specified
let category_filter = if let Some(cat) = category {
// Map category to arXiv archive parameter
// Examples: cs.AI -> computer_science, physics.cond-mat -> physics
let archive_name =
if let Some(dot_idx) = cat.find('.') { &cat[..dot_idx] } else { cat };

let archive_param = match archive_name {
"cs" => "computer_science",
"physics" => "physics",
"math" => "math",
"stat" => "statistics",
"q-bio" => "q-bio",
"q-fin" => "q-fin",
"econ" => "economics",
"eess" => "eess",
"astro-ph" => "physics",
"cond-mat" => "physics",
"gr-qc" => "physics",
"hep-ex" => "physics",
"hep-lat" => "physics",
"hep-ph" => "physics",
"hep-th" => "physics",
"nucl-ex" => "physics",
"nucl-th" => "physics",
"quant-ph" => "physics",
"acc-phys" => "physics",
"adapt" => "physics",
"ao" => "physics",
"atom" => "physics",
"atm-clus" => "physics",
"bell" => "physics",
"chem-ph" => "physics",
"comp-gas" => "physics",
"data-an" => "physics",
"dis-nn" => "physics",
"fluid" => "physics",
"gen-ph" => "physics",
"geo-ph" => "physics",
"hist-ph" => "physics",
"ins" => "physics",
"med-ph" => "physics",
"net-si" => "physics",
"other" => "physics",
"plasm-ph" => "physics",
"pop" => "physics",
"proxy" => "physics",
"soc-ph" => "physics",
"space-ph" => "physics",
_ => archive_name,
};

// For full category names like "cs.AI", use the full category
// For subcategories like "cond-mat", prepend with "physics."
let full_category = if cat.contains('.') {
cat.to_string()
} else if archive_param == "physics" {
format!("physics.{}", cat)
} else {
format!("{}.{}", archive_param, cat)
};

format!("&classification-{}_archives={}", archive_param, full_category)
} else {
String::new()
};

if after.is_some() || before.is_some() || category.is_some() {
let from_date = after.as_deref().unwrap_or("");
let to_date = before.as_deref().unwrap_or("");
format!(
"https://arxiv.org/search/advanced?advanced=1&terms-0-operator=AND&terms-0-term={}&terms-0-field=all&classification-physics_archives=all&classification-include_cross_list=include&date-filter_by=date_range&date-from_date={}&date-to_date={}&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first&start={}",
encoded_query, from_date, to_date, start
"https://arxiv.org/search/advanced?advanced=1&terms-0-operator=AND&terms-0-term={}&terms-0-field=all&classification-physics_archives=all&classification-include_cross_list=include{}&date-filter_by=date_range&date-from_date={}&date-to_date={}&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first&start={}",
encoded_query, category_filter, from_date, to_date, start
)
} else {
format!(
Expand All @@ -285,13 +358,13 @@ mod tests {

#[test]
fn test_build_search_url_simple() {
let url = ArxivClient::build_search_url("LLM", 0, &None, &None);
let url = ArxivClient::build_search_url("LLM", 0, &None, &None, None);
assert_eq!(url, "https://arxiv.org/search/?query=LLM&searchtype=all&source=header&start=0");
}

#[test]
fn test_build_search_url_with_pagination() {
let url = ArxivClient::build_search_url("LLM", 50, &None, &None);
let url = ArxivClient::build_search_url("LLM", 50, &None, &None, None);
assert_eq!(
url,
"https://arxiv.org/search/?query=LLM&searchtype=all&source=header&start=50"
Expand All @@ -302,12 +375,31 @@ mod tests {
fn test_build_search_url_with_dates() {
let after = Some("2023-01-01".to_string());
let before = Some("2023-12-31".to_string());
let url = ArxivClient::build_search_url("LLM", 0, &after, &before);
let url = ArxivClient::build_search_url("LLM", 0, &after, &before, None);
assert!(url.contains("date-filter_by=date_range"));
assert!(url.contains("date-from_date=2023-01-01"));
assert!(url.contains("date-to_date=2023-12-31"));
}

#[test]
fn test_build_search_url_with_category_cs() {
let url = ArxivClient::build_search_url("LLM", 0, &None, &None, Some("cs.AI"));
assert!(url.contains("classification-computer_science_archives=cs.AI"));
}

#[test]
fn test_build_search_url_with_category_physics() {
let url =
ArxivClient::build_search_url("quantum", 0, &None, &None, Some("physics.quant-ph"));
assert!(url.contains("classification-physics_archives=physics.quant-ph"));
}

#[test]
fn test_build_search_url_with_category_math() {
let url = ArxivClient::build_search_url("algebra", 0, &None, &None, Some("math.NA"));
assert!(url.contains("classification-math_archives=math.NA"));
}

#[test]
fn test_build_fetch_url_id() {
let url = ArxivClient::build_fetch_url("2512.04518");
Expand All @@ -323,7 +415,8 @@ mod tests {
#[test]
fn test_build_search_url_with_before_only() {
let before = Some("2023-10-13".to_string());
let url = ArxivClient::build_search_url("conversational data analysis", 0, &None, &before);
let url =
ArxivClient::build_search_url("conversational data analysis", 0, &None, &before, None);
assert!(url.contains("date-filter_by=date_range"));
assert!(url.contains("date-from_date=&"));
assert!(url.contains("date-to_date=2023-10-13"));
Expand Down
9 changes: 9 additions & 0 deletions src/mcp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ pub struct SearchPapersRequest {
#[schemars(description = "Filter by date (submitted after), format: YYYY-MM-DD")]
#[serde(default)]
pub after: Option<String>,

#[schemars(
description = "Filter by arXiv category (e.g., 'cs.AI', 'physics.quant-ph', 'math.NA')"
)]
#[serde(default)]
pub category: Option<String>,
}

#[derive(Serialize, Deserialize, JsonSchema)]
Expand Down Expand Up @@ -63,6 +69,7 @@ struct SearchCacheKey {
limit: Option<usize>,
before: Option<String>,
after: Option<String>,
category: Option<String>,
}

impl SearchCacheKey {
Expand All @@ -72,6 +79,7 @@ impl SearchCacheKey {
limit: req.limit,
before: req.before.clone(),
after: req.after.clone(),
category: req.category.clone(),
}
}

Expand Down Expand Up @@ -170,6 +178,7 @@ impl ArxivHandler {
request.limit,
request.after.clone(),
request.before.clone(),
request.category.clone(),
false,
)
.await
Expand Down
Loading