Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions claw2manus/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class SkillFetcher:
def fetch_skill_from_github(self, author: str, name: str) -> str | None:
url = self.CLAW_HUB_RAW_GITHUB_URL.format(author=author, name=name)
try:
response = requests.get(url)
response = requests.get(url, timeout=10)
response.raise_for_status() # Raise an exception for HTTP errors
return response.text
except requests.exceptions.RequestException as e:
Expand All @@ -22,7 +22,7 @@ def fetch_skill_from_clawhub_website(self, name: str) -> str | None:
"""Scrapes SKILL.md content from clawhub.ai."""
url = self.CLAW_HUB_WEBSITE_URL.format(name=name)
try:
response = requests.get(url)
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')

Expand All @@ -46,7 +46,7 @@ def discover_author_via_github(self, name: str) -> str | None:
url = self.GITHUB_SEARCH_API_URL.format(name=name)
headers = {"Accept": "application/vnd.github.v3+json"}
try:
response = requests.get(url, headers=headers)
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
if data.get("total_count", 0) > 0:
Expand All @@ -68,7 +68,7 @@ def fetch_skill(self, skill_identifier: str) -> tuple[str | None, str | None]:
skill_name = None

# Try to parse as a GitHub URL first
if "github.com" in skill_identifier and "SKILL.md" in skill_identifier:
if ("github.com" in skill_identifier or "githubusercontent.com" in skill_identifier) and "SKILL.md" in skill_identifier:
# Example: https://raw.githubusercontent.com/openclaw/skills/main/skills/peterskoett/self-improving-agent/SKILL.md
match = re.search(r"skills/(?P<author>[^/]+)/(?P<name>[^/]+)/SKILL.md", skill_identifier)
if match:
Expand Down
11 changes: 11 additions & 0 deletions pr_description.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
🎯 **What:** The testing gap addressed
Created tests for `fetcher.py` and specifically the `SkillFetcher` class. It had missing test coverage. Also discovered that `requests.get` requests were missing a timeout, which could cause indefinite hangs. Also fixed a small string matching bug for GitHub raw content paths (added `githubusercontent.com`).

📊 **Coverage:** What scenarios are now tested
- Tested `fetch_skill_from_github` happy and unhappy paths.
- Tested `fetch_skill_from_clawhub_website` happy (markdown block, and raw code block) and unhappy paths.
- Tested `discover_author_via_github` happy and unhappy paths, including json deserialization and GitHub API returns empty logic.
- Tested `fetch_skill` combining GitHub logic, author logic, and website scraping.

✨ **Result:** The improvement in test coverage
Test coverage is now complete for `fetcher.py`. Also the module is more robust thanks to the proper timeout argument in network requests.
157 changes: 157 additions & 0 deletions tests/test_fetcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import pytest
from unittest.mock import patch, MagicMock
from claw2manus.fetcher import SkillFetcher
import requests
import bs4
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The bs4 module is imported but not used in this test file. The BeautifulSoup class is patched within the claw2manus.fetcher module using a string reference, so this top-level import is unnecessary and can be removed to keep the code clean.


def test_fetch_skill_from_github_success():
fetcher = SkillFetcher()
with patch('requests.get') as mock_get:
mock_response = MagicMock()
mock_response.text = "test skill content"
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response

content = fetcher.fetch_skill_from_github("author1", "skill1")
assert content == "test skill content"
mock_get.assert_called_once()
Comment on lines +15 to +17
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Since a primary goal of this pull request is to ensure network requests have timeouts, the tests should explicitly verify that the timeout argument is passed to requests.get. This makes the tests more robust and ensures the fix is actually working as intended. Using assert_called_once_with allows checking both the URL and the timeout value.

Suggested change
content = fetcher.fetch_skill_from_github("author1", "skill1")
assert content == "test skill content"
mock_get.assert_called_once()
content = fetcher.fetch_skill_from_github("author1", "skill1")
assert content == "test skill content"
mock_get.assert_called_once_with(
"https://raw.githubusercontent.com/openclaw/skills/main/skills/author1/skill1/SKILL.md",
timeout=10
)


def test_fetch_skill_from_github_failure():
fetcher = SkillFetcher()
with patch('requests.get') as mock_get:
mock_get.side_effect = requests.exceptions.RequestException("Network error")

content = fetcher.fetch_skill_from_github("author1", "skill1")
assert content is None

@patch('claw2manus.fetcher.BeautifulSoup')
def test_fetch_skill_from_clawhub_website_success_markdown(mock_bs):
fetcher = SkillFetcher()
with patch('requests.get') as mock_get:
mock_response = MagicMock()
mock_response.text = '<html><body><div class="markdown-body">test content</div></body></html>'
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response

# Setup mock BS
mock_soup = MagicMock()
mock_content_element = MagicMock()
mock_content_element.get_text.return_value = "test content"
mock_soup.find.return_value = mock_content_element
mock_bs.return_value = mock_soup

content = fetcher.fetch_skill_from_clawhub_website("skill1")
assert content == "test content"

@patch('claw2manus.fetcher.BeautifulSoup')
def test_fetch_skill_from_clawhub_website_success_code(mock_bs):
fetcher = SkillFetcher()
with patch('requests.get') as mock_get:
mock_response = MagicMock()
mock_response.text = '<html><body><pre>test content</pre></body></html>'
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response

# Setup mock BS
mock_soup = MagicMock()
# First call to find (markdown-body or article) returns None
# Second call to find (pre or code) returns element
mock_soup.find.side_effect = [None, None, MagicMock(get_text=lambda: "test content")]
mock_bs.return_value = mock_soup

content = fetcher.fetch_skill_from_clawhub_website("skill1")
assert content == "test content"

def test_fetch_skill_from_clawhub_website_failure():
fetcher = SkillFetcher()
with patch('requests.get') as mock_get:
mock_get.side_effect = requests.exceptions.RequestException("Network error")

content = fetcher.fetch_skill_from_clawhub_website("skill1")
assert content is None

def test_discover_author_via_github_success():
fetcher = SkillFetcher()
with patch('requests.get') as mock_get:
mock_response = MagicMock()
mock_response.json.return_value = {
"total_count": 1,
"items": [{"path": "skills/author2/skill2/SKILL.md"}]
}
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response

author = fetcher.discover_author_via_github("skill2")
assert author == "author2"

def test_discover_author_via_github_not_found():
fetcher = SkillFetcher()
with patch('requests.get') as mock_get:
mock_response = MagicMock()
mock_response.json.return_value = {
"total_count": 0,
"items": []
}
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response

author = fetcher.discover_author_via_github("skill2")
assert author is None

def test_discover_author_via_github_exception():
fetcher = SkillFetcher()
with patch('requests.get') as mock_get:
mock_get.side_effect = Exception("Some error")

author = fetcher.discover_author_via_github("skill2")
assert author is None

def test_fetch_skill_with_github_url():
fetcher = SkillFetcher()
with patch.object(fetcher, 'fetch_skill_from_github', return_value="github content") as mock_fetch:
content, name = fetcher.fetch_skill("https://raw.githubusercontent.com/openclaw/skills/main/skills/author3/skill3/SKILL.md")
assert content == "github content"
assert name == "skill3"

def test_fetch_skill_with_author_and_name():
fetcher = SkillFetcher()
with patch.object(fetcher, 'fetch_skill_from_github', return_value="github content") as mock_fetch:
content, name = fetcher.fetch_skill("author4/skill4")
assert content == "github content"
assert name == "skill4"
mock_fetch.assert_called_once_with("author4", "skill4")

def test_fetch_skill_with_discovery():
fetcher = SkillFetcher()
with patch.object(fetcher, 'fetch_skill_from_github', side_effect=[None, None, "discovered content"]) as mock_github, \
patch.object(fetcher, 'discover_author_via_github', return_value="author5") as mock_discover:

content, name = fetcher.fetch_skill("skill5")

assert content == "discovered content"
assert name == "skill5"
mock_discover.assert_called_once_with("skill5")
assert mock_github.call_count == 3

def test_fetch_skill_fallback_to_scraping():
fetcher = SkillFetcher()
with patch.object(fetcher, 'fetch_skill_from_github', return_value=None), \
patch.object(fetcher, 'discover_author_via_github', return_value=None), \
patch.object(fetcher, 'fetch_skill_from_clawhub_website', return_value="scraped content") as mock_scrape:

content, name = fetcher.fetch_skill("skill6")

assert content == "scraped content"
assert name == "skill6"
mock_scrape.assert_called_once_with("skill6")

def test_fetch_skill_not_found():
fetcher = SkillFetcher()
with patch.object(fetcher, 'fetch_skill_from_github', return_value=None), \
patch.object(fetcher, 'discover_author_via_github', return_value=None), \
patch.object(fetcher, 'fetch_skill_from_clawhub_website', return_value=None):

content, name = fetcher.fetch_skill("skill7")

assert content is None
assert name is None