From 284d5ccdfa67b523bac6650d913d859fd5fed90c Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 27 Mar 2026 21:24:34 +0000 Subject: [PATCH 1/2] chore(deps): update workspace dependencies cargo update: aws-lc-sys, cc, cmake, iri-string, jni-sys, mio, simd-adler32 bumped to latest compatible versions. https://claude.ai/code/session_016qV822mRp2PAxDjHag54YV --- Cargo.lock | 52 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b9c32b9..0ee76b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,9 +139,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.39.0" +version = "0.39.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa7e52a4c5c547c741610a2c6f123f3881e409b714cd27e6798ef020c514f0a" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" dependencies = [ "cc", "cmake", @@ -211,9 +211,9 @@ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" -version = "1.2.57" +version = "1.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" dependencies = [ "find-msvc-tools", "jobserver", @@ -281,9 +281,9 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cmake" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" dependencies = [ "cc", ] @@ -1044,9 +1044,9 @@ checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "iri-string" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" +checksum = "d8e7418f59cc01c88316161279a7f665217ae316b388e58a0d10e29f54f1e5eb" dependencies = [ "memchr", "serde", @@ -1073,7 +1073,7 @@ dependencies = [ "cesu8", "cfg-if", "combine", - "jni-sys", + "jni-sys 0.3.1", "log", "thiserror 1.0.69", "walkdir", @@ -1082,9 +1082,31 @@ dependencies = [ [[package]] name = "jni-sys" -version = "0.3.0" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" +dependencies = [ + "jni-sys 0.4.1", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn", +] [[package]] name = "jobserver" @@ -1181,9 +1203,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -1870,9 +1892,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] name = "slab" From a75d687bc0234f901a701582b50da10102a26c02 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 27 Mar 2026 21:24:41 +0000 Subject: [PATCH 2/2] docs(specs): sync specs with current code - initial.md: MCP tool name web_fetch (not fetchkit), updated description and input schema to match implementation - fetchers.md: document all 13 built-in fetchers (was 3), update module structure and response format values https://claude.ai/code/session_016qV822mRp2PAxDjHag54YV --- specs/fetchers.md | 85 ++++++++++++++++++++++++++++++++++++++++++++++- specs/initial.md | 6 ++-- 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/specs/fetchers.md b/specs/fetchers.md index b66fc39..9bc9ce3 100644 --- a/specs/fetchers.md +++ b/specs/fetchers.md @@ -69,6 +69,68 @@ Central dispatcher that: - Quoted tweets rendered as blockquotes - Both APIs are unauthenticated; syndication API is undocumented but widely used +#### GitHubCodeFetcher + +- Matches: `https://github.com/{owner}/{repo}/blob/{ref}/{path}` +- Excludes: Reserved owner paths (settings, issues, pulls, etc.) +- Behavior: Fetches raw source files via GitHub API, detects language from extension, handles base64 decoding, returns metadata for files >1MB or binary +- Response format field: `"github_file"` + +#### GitHubIssueFetcher + +- Matches: `https://github.com/{owner}/{repo}/issues/{number}` and `https://github.com/{owner}/{repo}/pull/{number}` +- Excludes: Reserved owner paths, non-numeric IDs +- Behavior: Fetches issue/PR metadata, labels, assignees, milestone, and up to 100 comments; PRs include diff stats and merge status +- Response format field: `"github_issue"` or `"github_pull_request"` + +#### StackOverflowFetcher + +- Matches: `https://{stackoverflow.com|serverfault.com|superuser.com|askubuntu.com|mathoverflow.net|*.stackexchange.com}/questions/{id}` +- Behavior: Fetches question and top 10 answers sorted by votes via Stack Exchange API +- Response format field: `"stackoverflow_qa"` + +#### PackageRegistryFetcher + +- Matches: `https://pypi.org/project/{name}`, `https://crates.io/crates/{name}`, `https://www.npmjs.com/package/{name}` (including @scope/name) +- Behavior: Fetches package metadata from respective registry APIs +- Response format field: `"package_registry"` + +#### WikipediaFetcher + +- Matches: `https://{lang}.wikipedia.org/wiki/{title}` +- Behavior: Fetches article summary via MediaWiki REST API and full HTML, converts to markdown +- Response format field: `"wikipedia"` + +#### YouTubeFetcher + +- Matches: `https://youtube.com/watch?v={id}`, `https://youtu.be/{id}` +- Behavior: Fetches video metadata via oEmbed API +- Response format field: `"youtube_video"` + +#### ArXivFetcher + +- Matches: `https://arxiv.org/abs/{id}` and `https://arxiv.org/pdf/{id}` +- Behavior: Fetches paper metadata via arXiv Atom XML API +- Response format field: `"arxiv_paper"` + +#### HackerNewsFetcher + +- Matches: `https://news.ycombinator.com/item?id={id}` +- Behavior: Fetches item via HN Firebase API with top 20 comments and one level of replies +- Response format field: `"hackernews"` + +#### RSSFeedFetcher + +- Matches: URLs ending with `/feed`, `/rss`, `/atom`, `.rss`, `.xml` variants +- Behavior: Detects RSS 2.0 or Atom 1.0, parses up to 20 entries +- Response format field: `"rss_feed"` + +#### DocsSiteFetcher + +- Matches: Direct `/llms.txt` or `/llms-full.txt` URLs, or known docs sites (ReadTheDocs, docs.rs, GitBook, etc.) +- Behavior: Probes for llms-full.txt/llms.txt at origin; if not found, fetches page and converts HTML to markdown +- Response format field: `"documentation"` or `"markdown"` + ### Response Extensions `FetchResponse.format` values: @@ -76,7 +138,18 @@ Central dispatcher that: - `"text"` - HTML converted to plain text - `"raw"` - Original content unchanged - `"github_repo"` - GitHub repository metadata + README +- `"github_file"` - GitHub source file content +- `"github_issue"` - GitHub issue content +- `"github_pull_request"` - GitHub pull request content - `"twitter_tweet"` - Twitter/X tweet content with metadata +- `"stackoverflow_qa"` - Stack Overflow Q&A +- `"package_registry"` - Package registry metadata +- `"wikipedia"` - Wikipedia article +- `"youtube_video"` - YouTube video metadata +- `"arxiv_paper"` - arXiv paper metadata +- `"hackernews"` - Hacker News item with comments +- `"rss_feed"` - RSS/Atom feed entries +- `"documentation"` - Documentation site content ### Configuration @@ -127,9 +200,19 @@ crates/fetchkit/src/ ├── file_saver.rs # FileSaver trait, LocalFileSaver, SaveResult, FileSaveError ├── fetchers/ │ ├── mod.rs # Fetcher trait, FetcherRegistry +│ ├── arxiv.rs # ArXivFetcher │ ├── default.rs # DefaultFetcher (with binary-aware fetch_to_file override) +│ ├── docs_site.rs # DocsSiteFetcher +│ ├── github_code.rs # GitHubCodeFetcher +│ ├── github_issue.rs # GitHubIssueFetcher │ ├── github_repo.rs # GitHubRepoFetcher -│ └── twitter.rs # TwitterFetcher +│ ├── hackernews.rs # HackerNewsFetcher +│ ├── package_registry.rs # PackageRegistryFetcher +│ ├── rss_feed.rs # RSSFeedFetcher +│ ├── stackoverflow.rs # StackOverflowFetcher +│ ├── twitter.rs # TwitterFetcher +│ ├── wikipedia.rs # WikipediaFetcher +│ └── youtube.rs # YouTubeFetcher ``` ## API diff --git a/specs/initial.md b/specs/initial.md index 526af1d..7963edf 100644 --- a/specs/initial.md +++ b/specs/initial.md @@ -137,10 +137,10 @@ Provide a builder to configure tool options, including: ### MCP Server -- Expose a single `fetchkit` tool over MCP. -- Input schema: `{ url: string }` (required). +- Expose a single `web_fetch` tool over MCP. +- Input schema: derived from `FetchRequest` via tool builder (disabled options omitted). - Output: Markdown with YAML frontmatter (same format as CLI `--output md`). -- Tool description: "Fetch URL and return markdown with metadata frontmatter. Optimized for LLM consumption." +- Tool description: "Fetch URL content as text or markdown; return metadata for binary responses or save bytes to file." ### Python Bindings