55- markdownify: Convert any webpage into clean, formatted markdown
66- smartscraper: Extract structured data from any webpage using AI
77- searchscraper: Perform AI-powered web searches with structured results
8+ - crawl_requester: Initiate intelligent web crawling requests (step 1)
9+ - crawl_fetcher: Fetch results from crawling requests (step 2)
810"""
911
1012import os
@@ -56,22 +58,32 @@ def markdownify(self, website_url: str) -> Dict[str, Any]:
5658
5759 return response .json ()
5860
59- def smartscraper (self , user_prompt : str , website_url : str ) -> Dict [str , Any ]:
61+ def smartscraper (self , user_prompt : str , website_url : str , number_of_scrolls : int = None , markdown_only : bool = None ) -> Dict [str , Any ]:
6062 """
6163 Extract structured data from a webpage using AI.
6264
6365 Args:
6466 user_prompt: Instructions for what data to extract
6567 website_url: URL of the webpage to scrape
68+ number_of_scrolls: Number of infinite scrolls to perform (optional)
69+ markdown_only: Whether to return only markdown content without AI processing (optional)
6670
6771 Returns:
68- Dictionary containing the extracted data
72+ Dictionary containing the extracted data or markdown content
6973 """
7074 url = f"{ self .BASE_URL } /smartscraper"
7175 data = {
7276 "user_prompt" : user_prompt ,
7377 "website_url" : website_url
7478 }
79+
80+ # Add number_of_scrolls to the request if provided
81+ if number_of_scrolls is not None :
82+ data ["number_of_scrolls" ] = number_of_scrolls
83+
84+ # Add markdown_only to the request if provided
85+ if markdown_only is not None :
86+ data ["markdown_only" ] = markdown_only
7587
7688 response = self .client .post (url , headers = self .headers , json = data )
7789
@@ -81,12 +93,14 @@ def smartscraper(self, user_prompt: str, website_url: str) -> Dict[str, Any]:
8193
8294 return response .json ()
8395
84- def searchscraper (self , user_prompt : str ) -> Dict [str , Any ]:
96+ def searchscraper (self , user_prompt : str , num_results : int = None , number_of_scrolls : int = None ) -> Dict [str , Any ]:
8597 """
8698 Perform AI-powered web searches with structured results.
8799
88100 Args:
89101 user_prompt: Search query or instructions
102+ num_results: Number of websites to search (optional, default: 3 websites = 30 credits)
103+ number_of_scrolls: Number of infinite scrolls to perform on each website (optional)
90104
91105 Returns:
92106 Dictionary containing search results and reference URLs
@@ -95,6 +109,14 @@ def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
95109 data = {
96110 "user_prompt" : user_prompt
97111 }
112+
113+ # Add num_results to the request if provided
114+ if num_results is not None :
115+ data ["num_results" ] = num_results
116+
117+ # Add number_of_scrolls to the request if provided
118+ if number_of_scrolls is not None :
119+ data ["number_of_scrolls" ] = number_of_scrolls
98120
99121 response = self .client .post (url , headers = self .headers , json = data )
100122
@@ -104,6 +126,81 @@ def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
104126
105127 return response .json ()
106128
129+ def crawl_requester (
130+ self ,
131+ url : str ,
132+ prompt : str = None ,
133+ cache_website : bool = None ,
134+ depth : int = None ,
135+ max_pages : int = None ,
136+ same_domain_only : bool = None ,
137+ markdown_only : bool = None
138+ ) -> Dict [str , Any ]:
139+ """
140+ Initiate a web crawling request and get a request ID.
141+
142+ Args:
143+ url: Starting URL to crawl
144+ prompt: AI prompt for data extraction (optional, if not provided returns markdown only)
145+ cache_website: Whether to cache the website content (optional)
146+ depth: Maximum crawling depth (optional)
147+ max_pages: Maximum number of pages to crawl (optional)
148+ same_domain_only: Whether to crawl only within the same domain (optional)
149+ markdown_only: Whether to return only markdown content without AI processing (optional)
150+
151+ Returns:
152+ Dictionary containing the request ID and status
153+ """
154+ endpoint = f"{ self .BASE_URL } /crawl/requester"
155+ data = {
156+ "url" : url
157+ }
158+
159+ # Add optional parameters if provided
160+ if prompt is not None :
161+ data ["prompt" ] = prompt
162+ if cache_website is not None :
163+ data ["cache_website" ] = cache_website
164+ if depth is not None :
165+ data ["depth" ] = depth
166+ if max_pages is not None :
167+ data ["max_pages" ] = max_pages
168+ if same_domain_only is not None :
169+ data ["same_domain_only" ] = same_domain_only
170+ if markdown_only is not None :
171+ data ["markdown_only" ] = markdown_only
172+
173+ response = self .client .post (endpoint , headers = self .headers , json = data )
174+
175+ if response .status_code != 200 :
176+ error_msg = f"Error { response .status_code } : { response .text } "
177+ raise Exception (error_msg )
178+
179+ return response .json ()
180+
181+ def crawl_fetcher (self , request_id : str ) -> Dict [str , Any ]:
182+ """
183+ Fetch the results of a crawling request using the request ID.
184+
185+ Args:
186+ request_id: The request ID returned by crawl_requester
187+
188+ Returns:
189+ Dictionary containing the crawl results or status
190+ """
191+ endpoint = f"{ self .BASE_URL } /crawl/fetcher"
192+ data = {
193+ "request_id" : request_id
194+ }
195+
196+ response = self .client .post (endpoint , headers = self .headers , json = data )
197+
198+ if response .status_code != 200 :
199+ error_msg = f"Error { response .status_code } : { response .text } "
200+ raise Exception (error_msg )
201+
202+ return response .json ()
203+
107204 def close (self ) -> None :
108205 """Close the HTTP client."""
109206 self .client .close ()
@@ -142,37 +239,45 @@ def markdownify(website_url: str) -> Dict[str, Any]:
142239@mcp .tool ()
143240def smartscraper (
144241 user_prompt : str ,
145- website_url : str
242+ website_url : str ,
243+ number_of_scrolls : int = None ,
244+ markdown_only : bool = None
146245) -> Dict [str , Any ]:
147246 """
148247 Extract structured data from a webpage using AI.
149248
150249 Args:
151250 user_prompt: Instructions for what data to extract
152251 website_url: URL of the webpage to scrape
252+ number_of_scrolls: Number of infinite scrolls to perform (optional)
253+ markdown_only: Whether to return only markdown content without AI processing (optional)
153254
154255 Returns:
155- Dictionary containing the extracted data
256+ Dictionary containing the extracted data or markdown content
156257 """
157258 if scrapegraph_client is None :
158259 return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
159260
160261 try :
161- return scrapegraph_client .smartscraper (user_prompt , website_url )
262+ return scrapegraph_client .smartscraper (user_prompt , website_url , number_of_scrolls , markdown_only )
162263 except Exception as e :
163264 return {"error" : str (e )}
164265
165266
166267# Add tool for searchscraper
167268@mcp .tool ()
168269def searchscraper (
169- user_prompt : str
270+ user_prompt : str ,
271+ num_results : int = None ,
272+ number_of_scrolls : int = None
170273) -> Dict [str , Any ]:
171274 """
172275 Perform AI-powered web searches with structured results.
173276
174277 Args:
175278 user_prompt: Search query or instructions
279+ num_results: Number of websites to search (optional, default: 3 websites = 30 credits)
280+ number_of_scrolls: Number of infinite scrolls to perform on each website (optional)
176281
177282 Returns:
178283 Dictionary containing search results and reference URLs
@@ -181,7 +286,71 @@ def searchscraper(
181286 return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
182287
183288 try :
184- return scrapegraph_client .searchscraper (user_prompt )
289+ return scrapegraph_client .searchscraper (user_prompt , num_results , number_of_scrolls )
290+ except Exception as e :
291+ return {"error" : str (e )}
292+
293+
294+ # Add tool for crawl requester (smartcrawler step 1)
295+ @mcp .tool ()
296+ def crawl_requester (
297+ url : str ,
298+ prompt : str = None ,
299+ cache_website : bool = None ,
300+ depth : int = None ,
301+ max_pages : int = None ,
302+ same_domain_only : bool = None ,
303+ markdown_only : bool = None
304+ ) -> Dict [str , Any ]:
305+ """
306+ Initiate a web crawling request and get a request ID.
307+
308+ Args:
309+ url: Starting URL to crawl
310+ prompt: AI prompt for data extraction (optional, if not provided returns markdown only)
311+ cache_website: Whether to cache the website content (optional)
312+ depth: Maximum crawling depth (optional)
313+ max_pages: Maximum number of pages to crawl (optional)
314+ same_domain_only: Whether to crawl only within the same domain (optional)
315+ markdown_only: Whether to return only markdown content without AI processing (optional)
316+
317+ Returns:
318+ Dictionary containing the request ID and status
319+ """
320+ if scrapegraph_client is None :
321+ return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
322+
323+ try :
324+ return scrapegraph_client .crawl_requester (
325+ url = url ,
326+ prompt = prompt ,
327+ cache_website = cache_website ,
328+ depth = depth ,
329+ max_pages = max_pages ,
330+ same_domain_only = same_domain_only ,
331+ markdown_only = markdown_only
332+ )
333+ except Exception as e :
334+ return {"error" : str (e )}
335+
336+
337+ # Add tool for crawl fetcher (smartcrawler step 2)
338+ @mcp .tool ()
339+ def crawl_fetcher (request_id : str ) -> Dict [str , Any ]:
340+ """
341+ Fetch the results of a crawling request using the request ID.
342+
343+ Args:
344+ request_id: The request ID returned by crawl_requester
345+
346+ Returns:
347+ Dictionary containing the crawl results or status
348+ """
349+ if scrapegraph_client is None :
350+ return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
351+
352+ try :
353+ return scrapegraph_client .crawl_fetcher (request_id )
185354 except Exception as e :
186355 return {"error" : str (e )}
187356
0 commit comments