From d8e140f6bf969449c6577c0bc68dc03cab2a3a5b Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 7 Aug 2025 12:12:34 +0200
Subject: [PATCH 1/2] Update server.py

---
 src/scrapegraph_mcp/server.py | 140 ++++++++++++++++++++++++++++++++--
 1 file changed, 132 insertions(+), 8 deletions(-)

diff --git a/src/scrapegraph_mcp/server.py b/src/scrapegraph_mcp/server.py
index 4d06e30..68325c6 100644
--- a/src/scrapegraph_mcp/server.py
+++ b/src/scrapegraph_mcp/server.py
@@ -5,6 +5,7 @@
 - markdownify: Convert any webpage into clean, formatted markdown
 - smartscraper: Extract structured data from any webpage using AI
 - searchscraper: Perform AI-powered web searches with structured results
+- crawl: Perform intelligent web crawling with AI-powered data extraction
 """
 
 import os
@@ -56,22 +57,32 @@ def markdownify(self, website_url: str) -> Dict[str, Any]:
 
         return response.json()
 
-    def smartscraper(self, user_prompt: str, website_url: str) -> Dict[str, Any]:
+    def smartscraper(self, user_prompt: str, website_url: str, number_of_scrolls: int = None, markdown_only: bool = None) -> Dict[str, Any]:
         """
         Extract structured data from a webpage using AI.
 
         Args:
             user_prompt: Instructions for what data to extract
             website_url: URL of the webpage to scrape
+            number_of_scrolls: Number of infinite scrolls to perform (optional)
+            markdown_only: Whether to return only markdown content without AI processing (optional)
 
         Returns:
-            Dictionary containing the extracted data
+            Dictionary containing the extracted data or markdown content
         """
         url = f"{self.BASE_URL}/smartscraper"
         data = {
             "user_prompt": user_prompt,
             "website_url": website_url
         }
+        
+        # Add number_of_scrolls to the request if provided
+        if number_of_scrolls is not None:
+            data["number_of_scrolls"] = number_of_scrolls
+            
+        # Add markdown_only to the request if provided
+        if markdown_only is not None:
+            data["markdown_only"] = markdown_only
 
         response = self.client.post(url, headers=self.headers, json=data)
 
@@ -81,12 +92,14 @@ def smartscraper(self, user_prompt: str, website_url: str) -> Dict[str, Any]:
 
         return response.json()
 
-    def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
+    def searchscraper(self, user_prompt: str, num_results: int = None, number_of_scrolls: int = None) -> Dict[str, Any]:
         """
         Perform AI-powered web searches with structured results.
 
         Args:
             user_prompt: Search query or instructions
+            num_results: Number of websites to search (optional, default: 3 websites = 30 credits)
+            number_of_scrolls: Number of infinite scrolls to perform on each website (optional)
 
         Returns:
             Dictionary containing search results and reference URLs
@@ -95,6 +108,14 @@ def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
         data = {
             "user_prompt": user_prompt
         }
+        
+        # Add num_results to the request if provided
+        if num_results is not None:
+            data["num_results"] = num_results
+            
+        # Add number_of_scrolls to the request if provided
+        if number_of_scrolls is not None:
+            data["number_of_scrolls"] = number_of_scrolls
 
         response = self.client.post(url, headers=self.headers, json=data)
 
@@ -104,6 +125,58 @@ def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
 
         return response.json()
 
+    def crawl(
+        self, 
+        url: str, 
+        prompt: str = None, 
+        cache_website: bool = None,
+        depth: int = None,
+        max_pages: int = None,
+        same_domain_only: bool = None,
+        markdown_only: bool = None
+    ) -> Dict[str, Any]:
+        """
+        Perform intelligent web crawling with AI-powered data extraction.
+
+        Args:
+            url: Starting URL to crawl
+            prompt: AI prompt for data extraction (optional, if not provided returns markdown only)
+            cache_website: Whether to cache the website content (optional)
+            depth: Maximum crawling depth (optional)
+            max_pages: Maximum number of pages to crawl (optional)
+            same_domain_only: Whether to crawl only within the same domain (optional)
+            markdown_only: Whether to return only markdown content without AI processing (optional)
+
+        Returns:
+            Dictionary containing the crawl results
+        """
+        endpoint = f"{self.BASE_URL}/crawl"
+        data = {
+            "url": url
+        }
+        
+        # Add optional parameters if provided
+        if prompt is not None:
+            data["prompt"] = prompt
+        if cache_website is not None:
+            data["cache_website"] = cache_website
+        if depth is not None:
+            data["depth"] = depth
+        if max_pages is not None:
+            data["max_pages"] = max_pages
+        if same_domain_only is not None:
+            data["same_domain_only"] = same_domain_only
+        if markdown_only is not None:
+            data["markdown_only"] = markdown_only
+
+        response = self.client.post(endpoint, headers=self.headers, json=data)
+
+        if response.status_code != 200:
+            error_msg = f"Error {response.status_code}: {response.text}"
+            raise Exception(error_msg)
+
+        return response.json()
+
     def close(self) -> None:
         """Close the HTTP client."""
         self.client.close()
@@ -142,7 +215,9 @@ def markdownify(website_url: str) -> Dict[str, Any]:
 @mcp.tool()
 def smartscraper(
     user_prompt: str, 
-    website_url: str
+    website_url: str,
+    number_of_scrolls: int = None,
+    markdown_only: bool = None
 ) -> Dict[str, Any]:
     """
     Extract structured data from a webpage using AI.
@@ -150,15 +225,17 @@ def smartscraper(
     Args:
         user_prompt: Instructions for what data to extract
         website_url: URL of the webpage to scrape
+        number_of_scrolls: Number of infinite scrolls to perform (optional)
+        markdown_only: Whether to return only markdown content without AI processing (optional)
 
     Returns:
-        Dictionary containing the extracted data
+        Dictionary containing the extracted data or markdown content
     """
     if scrapegraph_client is None:
         return {"error": "ScapeGraph client not initialized. Please provide an API key."}
 
     try:
-        return scrapegraph_client.smartscraper(user_prompt, website_url)
+        return scrapegraph_client.smartscraper(user_prompt, website_url, number_of_scrolls, markdown_only)
     except Exception as e:
         return {"error": str(e)}
 
@@ -166,13 +243,17 @@ def smartscraper(
 # Add tool for searchscraper
 @mcp.tool()
 def searchscraper(
-    user_prompt: str
+    user_prompt: str,
+    num_results: int = None,
+    number_of_scrolls: int = None
 ) -> Dict[str, Any]:
     """
     Perform AI-powered web searches with structured results.
 
     Args:
         user_prompt: Search query or instructions
+        num_results: Number of websites to search (optional, default: 3 websites = 30 credits)
+        number_of_scrolls: Number of infinite scrolls to perform on each website (optional)
 
     Returns:
         Dictionary containing search results and reference URLs
@@ -181,7 +262,50 @@ def searchscraper(
         return {"error": "ScapeGraph client not initialized. Please provide an API key."}
 
     try:
-        return scrapegraph_client.searchscraper(user_prompt)
+        return scrapegraph_client.searchscraper(user_prompt, num_results, number_of_scrolls)
+    except Exception as e:
+        return {"error": str(e)}
+
+
+# Add tool for crawl (smartcrawler)
+@mcp.tool()
+def crawl(
+    url: str,
+    prompt: str = None,
+    cache_website: bool = None,
+    depth: int = None,
+    max_pages: int = None,
+    same_domain_only: bool = None,
+    markdown_only: bool = None
+) -> Dict[str, Any]:
+    """
+    Perform intelligent web crawling with AI-powered data extraction.
+
+    Args:
+        url: Starting URL to crawl
+        prompt: AI prompt for data extraction (optional, if not provided returns markdown only)
+        cache_website: Whether to cache the website content (optional)
+        depth: Maximum crawling depth (optional)
+        max_pages: Maximum number of pages to crawl (optional)
+        same_domain_only: Whether to crawl only within the same domain (optional)
+        markdown_only: Whether to return only markdown content without AI processing (optional)
+
+    Returns:
+        Dictionary containing the crawl results
+    """
+    if scrapegraph_client is None:
+        return {"error": "ScapeGraph client not initialized. Please provide an API key."}
+
+    try:
+        return scrapegraph_client.crawl(
+            url=url,
+            prompt=prompt,
+            cache_website=cache_website,
+            depth=depth,
+            max_pages=max_pages,
+            same_domain_only=same_domain_only,
+            markdown_only=markdown_only
+        )
     except Exception as e:
         return {"error": str(e)}
 

From 16808d0a774578920a9028ed5bd1f9b96960757d Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 7 Aug 2025 12:16:47 +0200
Subject: [PATCH 2/2] Update server.py

---
 src/scrapegraph_mcp/server.py | 65 +++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 10 deletions(-)

diff --git a/src/scrapegraph_mcp/server.py b/src/scrapegraph_mcp/server.py
index 68325c6..42e638c 100644
--- a/src/scrapegraph_mcp/server.py
+++ b/src/scrapegraph_mcp/server.py
@@ -5,7 +5,8 @@
 - markdownify: Convert any webpage into clean, formatted markdown
 - smartscraper: Extract structured data from any webpage using AI
 - searchscraper: Perform AI-powered web searches with structured results
-- crawl: Perform intelligent web crawling with AI-powered data extraction
+- crawl_requester: Initiate intelligent web crawling requests (step 1)
+- crawl_fetcher: Fetch results from crawling requests (step 2)
 """
 
 import os
@@ -125,7 +126,7 @@ def searchscraper(self, user_prompt: str, num_results: int = None, number_of_scr
 
         return response.json()
 
-    def crawl(
+    def crawl_requester(
         self, 
         url: str, 
         prompt: str = None, 
@@ -136,7 +137,7 @@ def crawl(
         markdown_only: bool = None
     ) -> Dict[str, Any]:
         """
-        Perform intelligent web crawling with AI-powered data extraction.
+        Initiate a web crawling request and get a request ID.
 
         Args:
             url: Starting URL to crawl
@@ -148,9 +149,9 @@ def crawl(
             markdown_only: Whether to return only markdown content without AI processing (optional)
 
         Returns:
-            Dictionary containing the crawl results
+            Dictionary containing the request ID and status
         """
-        endpoint = f"{self.BASE_URL}/crawl"
+        endpoint = f"{self.BASE_URL}/crawl/requester"
         data = {
             "url": url
         }
@@ -177,6 +178,29 @@ def crawl(
 
         return response.json()
 
+    def crawl_fetcher(self, request_id: str) -> Dict[str, Any]:
+        """
+        Fetch the results of a crawling request using the request ID.
+
+        Args:
+            request_id: The request ID returned by crawl_requester
+
+        Returns:
+            Dictionary containing the crawl results or status
+        """
+        endpoint = f"{self.BASE_URL}/crawl/fetcher"
+        data = {
+            "request_id": request_id
+        }
+
+        response = self.client.post(endpoint, headers=self.headers, json=data)
+
+        if response.status_code != 200:
+            error_msg = f"Error {response.status_code}: {response.text}"
+            raise Exception(error_msg)
+
+        return response.json()
+
     def close(self) -> None:
         """Close the HTTP client."""
         self.client.close()
@@ -267,9 +291,9 @@ def searchscraper(
         return {"error": str(e)}
 
 
-# Add tool for crawl (smartcrawler)
+# Add tool for crawl requester (smartcrawler step 1)
 @mcp.tool()
-def crawl(
+def crawl_requester(
     url: str,
     prompt: str = None,
     cache_website: bool = None,
@@ -279,7 +303,7 @@ def crawl(
     markdown_only: bool = None
 ) -> Dict[str, Any]:
     """
-    Perform intelligent web crawling with AI-powered data extraction.
+    Initiate a web crawling request and get a request ID.
 
     Args:
         url: Starting URL to crawl
@@ -291,13 +315,13 @@ def crawl(
         markdown_only: Whether to return only markdown content without AI processing (optional)
 
     Returns:
-        Dictionary containing the crawl results
+        Dictionary containing the request ID and status
     """
     if scrapegraph_client is None:
         return {"error": "ScapeGraph client not initialized. Please provide an API key."}
 
     try:
-        return scrapegraph_client.crawl(
+        return scrapegraph_client.crawl_requester(
             url=url,
             prompt=prompt,
             cache_website=cache_website,
@@ -310,6 +334,27 @@ def crawl(
         return {"error": str(e)}
 
 
+# Add tool for crawl fetcher (smartcrawler step 2)
+@mcp.tool()
+def crawl_fetcher(request_id: str) -> Dict[str, Any]:
+    """
+    Fetch the results of a crawling request using the request ID.
+
+    Args:
+        request_id: The request ID returned by crawl_requester
+
+    Returns:
+        Dictionary containing the crawl results or status
+    """
+    if scrapegraph_client is None:
+        return {"error": "ScapeGraph client not initialized. Please provide an API key."}
+
+    try:
+        return scrapegraph_client.crawl_fetcher(request_id)
+    except Exception as e:
+        return {"error": str(e)}
+
+
 def main() -> None:
     """Run the ScapeGraph MCP server."""
     print("Starting ScapeGraph MCP server!")