1010""" 
1111
1212import  os 
13- from  typing  import  Any , Dict 
13+ import  json 
14+ from  typing  import  Any , Dict , Optional , List , Union 
1415
1516import  httpx 
1617from  fastmcp  import  FastMCP 
@@ -33,7 +34,7 @@ def __init__(self, api_key: str):
3334            "SGAI-APIKEY" : api_key ,
3435            "Content-Type" : "application/json" 
3536        }
36-         self .client  =  httpx .Client (timeout = 60.0 )
37+         self .client  =  httpx .Client (timeout = httpx . Timeout ( 120.0 ) )
3738
3839    def  markdownify (self , website_url : str ) ->  Dict [str , Any ]:
3940        """ 
@@ -126,6 +127,85 @@ def searchscraper(self, user_prompt: str, num_results: int = None, number_of_scr
126127
127128        return  response .json ()
128129
130+     def  scrape (self , website_url : str , render_heavy_js : Optional [bool ] =  None ) ->  Dict [str , Any ]:
131+         """ 
132+         Basic scrape endpoint to fetch page content. 
133+ 
134+         Args: 
135+             website_url: URL to scrape 
136+             render_heavy_js: Whether to render heavy JS (optional) 
137+ 
138+         Returns: 
139+             Dictionary containing the scraped result 
140+         """ 
141+         url  =  f"{ self .BASE_URL }  /scrape" 
142+         payload : Dict [str , Any ] =  {"website_url" : website_url }
143+         if  render_heavy_js  is  not   None :
144+             payload ["render_heavy_js" ] =  render_heavy_js 
145+ 
146+         response  =  self .client .post (url , headers = self .headers , json = payload )
147+         response .raise_for_status ()
148+         return  response .json ()
149+ 
150+     def  sitemap (self , website_url : str ) ->  Dict [str , Any ]:
151+         """ 
152+         Extract sitemap for a given website. 
153+ 
154+         Args: 
155+             website_url: Base website URL 
156+ 
157+         Returns: 
158+             Dictionary containing sitemap URLs/structure 
159+         """ 
160+         url  =  f"{ self .BASE_URL }  /sitemap" 
161+         payload : Dict [str , Any ] =  {"website_url" : website_url }
162+ 
163+         response  =  self .client .post (url , headers = self .headers , json = payload )
164+         response .raise_for_status ()
165+         return  response .json ()
166+ 
167+     def  agentic_scrapper (
168+         self ,
169+         url : str ,
170+         user_prompt : Optional [str ] =  None ,
171+         output_schema : Optional [Dict [str , Any ]] =  None ,
172+         steps : Optional [List [str ]] =  None ,
173+         ai_extraction : Optional [bool ] =  None ,
174+         persistent_session : Optional [bool ] =  None ,
175+         timeout_seconds : Optional [float ] =  None ,
176+     ) ->  Dict [str , Any ]:
177+         """ 
178+         Run the Agentic Scraper workflow (no live session/browser interaction). 
179+ 
180+         Args: 
181+             url: Target website URL 
182+             user_prompt: Instructions for what to do/extract (optional) 
183+             output_schema: Desired structured output schema (optional) 
184+             steps: High-level steps/instructions for the agent (optional) 
185+             ai_extraction: Whether to enable AI extraction mode (optional) 
186+             persistent_session: Whether to keep session alive between steps (optional) 
187+             timeout_seconds: Per-request timeout override in seconds (optional) 
188+         """ 
189+         endpoint  =  f"{ self .BASE_URL }  /agentic-scrapper" 
190+         payload : Dict [str , Any ] =  {"url" : url }
191+         if  user_prompt  is  not   None :
192+             payload ["user_prompt" ] =  user_prompt 
193+         if  output_schema  is  not   None :
194+             payload ["output_schema" ] =  output_schema 
195+         if  steps  is  not   None :
196+             payload ["steps" ] =  steps 
197+         if  ai_extraction  is  not   None :
198+             payload ["ai_extraction" ] =  ai_extraction 
199+         if  persistent_session  is  not   None :
200+             payload ["persistent_session" ] =  persistent_session 
201+ 
202+         if  timeout_seconds  is  not   None :
203+             response  =  self .client .post (endpoint , headers = self .headers , json = payload , timeout = timeout_seconds )
204+         else :
205+             response  =  self .client .post (endpoint , headers = self .headers , json = payload )
206+         response .raise_for_status ()
207+         return  response .json ()
208+ 
129209    def  smartcrawler_initiate (
130210        self , 
131211        url : str , 
@@ -371,6 +451,110 @@ def smartcrawler_fetch_results(request_id: str) -> Dict[str, Any]:
371451        return  {"error" : str (e )}
372452
373453
454+ # Add tool for basic scrape 
455+ @mcp .tool () 
456+ def  scrape (website_url : str , render_heavy_js : Optional [bool ] =  None ) ->  Dict [str , Any ]:
457+     """ 
458+     Fetch page content for a URL. 
459+ 
460+     Args: 
461+         website_url: URL to scrape 
462+         render_heavy_js: Whether to render heavy JS (optional) 
463+     """ 
464+     if  scrapegraph_client  is  None :
465+         return  {"error" : "ScapeGraph client not initialized. Please provide an API key." }
466+ 
467+     try :
468+         return  scrapegraph_client .scrape (website_url = website_url , render_heavy_js = render_heavy_js )
469+     except  httpx .HTTPError  as  http_err :
470+         return  {"error" : str (http_err )}
471+     except  ValueError  as  val_err :
472+         return  {"error" : str (val_err )}
473+ 
474+ 
475+ # Add tool for sitemap extraction 
476+ @mcp .tool () 
477+ def  sitemap (website_url : str ) ->  Dict [str , Any ]:
478+     """ 
479+     Extract sitemap for a website. 
480+ 
481+     Args: 
482+         website_url: Base website URL 
483+     """ 
484+     if  scrapegraph_client  is  None :
485+         return  {"error" : "ScapeGraph client not initialized. Please provide an API key." }
486+ 
487+     try :
488+         return  scrapegraph_client .sitemap (website_url = website_url )
489+     except  httpx .HTTPError  as  http_err :
490+         return  {"error" : str (http_err )}
491+     except  ValueError  as  val_err :
492+         return  {"error" : str (val_err )}
493+ 
494+ 
495+ # Add tool for Agentic Scraper (no live session/browser interaction) 
496+ @mcp .tool () 
497+ def  agentic_scrapper (
498+     url : str ,
499+     user_prompt : Optional [str ] =  None ,
500+     output_schema : Optional [Union [str , Dict [str , Any ]]] =  None ,
501+     steps : Optional [Union [str , List [str ]]] =  None ,
502+     ai_extraction : Optional [bool ] =  None ,
503+     persistent_session : Optional [bool ] =  None ,
504+     timeout_seconds : Optional [float ] =  None ,
505+ ) ->  Dict [str , Any ]:
506+     """ 
507+     Run the Agentic Scraper workflow. Accepts flexible input forms for steps and schema. 
508+     """ 
509+     if  scrapegraph_client  is  None :
510+         return  {"error" : "ScapeGraph client not initialized. Please provide an API key." }
511+ 
512+     # Normalize inputs 
513+     normalized_steps : Optional [List [str ]] =  None 
514+     if  isinstance (steps , list ):
515+         normalized_steps  =  steps 
516+     elif  isinstance (steps , str ):
517+         parsed_steps : Optional [Any ] =  None 
518+         try :
519+             parsed_steps  =  json .loads (steps )
520+         except  json .JSONDecodeError :
521+             parsed_steps  =  None 
522+         if  isinstance (parsed_steps , list ):
523+             normalized_steps  =  parsed_steps 
524+         else :
525+             normalized_steps  =  [steps ]
526+ 
527+     normalized_schema : Optional [Dict [str , Any ]] =  None 
528+     if  isinstance (output_schema , dict ):
529+         normalized_schema  =  output_schema 
530+     elif  isinstance (output_schema , str ):
531+         try :
532+             parsed_schema  =  json .loads (output_schema )
533+             if  isinstance (parsed_schema , dict ):
534+                 normalized_schema  =  parsed_schema 
535+             else :
536+                 return  {"error" : "output_schema must be a JSON object" }
537+         except  json .JSONDecodeError  as  e :
538+             return  {"error" : f"Invalid JSON for output_schema: { str (e )}  " }
539+ 
540+     try :
541+         return  scrapegraph_client .agentic_scrapper (
542+             url = url ,
543+             user_prompt = user_prompt ,
544+             output_schema = normalized_schema ,
545+             steps = normalized_steps ,
546+             ai_extraction = ai_extraction ,
547+             persistent_session = persistent_session ,
548+             timeout_seconds = timeout_seconds ,
549+         )
550+     except  httpx .TimeoutException  as  timeout_err :
551+         return  {"error" : f"Request timed out: { str (timeout_err )}  " }
552+     except  httpx .HTTPError  as  http_err :
553+         return  {"error" : str (http_err )}
554+     except  ValueError  as  val_err :
555+         return  {"error" : str (val_err )}
556+ 
557+ 
374558def  main () ->  None :
375559    """Run the ScapeGraph MCP server.""" 
376560    print ("Starting ScapeGraph MCP server!" )
0 commit comments