1+ """
2+ Example demonstrating how to use the SmartScraper API with cookies (Async).
3+
4+ This example shows how to:
5+ 1. Set up the API request with cookies for authentication
6+ 2. Use cookies with infinite scrolling
7+ 3. Define a Pydantic model for structured output
8+ 4. Make the API call and handle the response
9+ 5. Process the extracted data
10+
11+ Requirements:
12+ - Python 3.7+
13+ - scrapegraph-py
14+ - A .env file with your SGAI_API_KEY
15+
16+ Example .env file:
17+ SGAI_API_KEY=your_api_key_here
18+ """
19+
20+ import asyncio
21+ import json
22+ import os
23+ from typing import Dict , Optional
24+
25+ from dotenv import load_dotenv
26+ from pydantic import BaseModel , Field
27+
28+ from scrapegraph_py import AsyncClient
29+
30+ # Load environment variables from .env file
31+ load_dotenv ()
32+
33+
34+ # Define the data models for structured output
35+ class CookieInfo (BaseModel ):
36+ """Model representing cookie information."""
37+
38+ cookies : Dict [str , str ] = Field (description = "Dictionary of cookie key-value pairs" )
39+
40+
41+ async def main ():
42+ """Example usage of the cookies scraper."""
43+ # Check if API key is available
44+ if not os .getenv ("SGAI_API_KEY" ):
45+ print ("Error: SGAI_API_KEY not found in .env file" )
46+ print ("Please create a .env file with your API key:" )
47+ print ("SGAI_API_KEY=your_api_key_here" )
48+ return
49+
50+ # Initialize the async client
51+ async with AsyncClient .from_env () as client :
52+ # Example 1: Basic cookies example (httpbin.org/cookies)
53+ print ("=" * 60 )
54+ print ("EXAMPLE 1: Basic Cookies Example" )
55+ print ("=" * 60 )
56+
57+ website_url = "https://httpbin.org/cookies"
58+ user_prompt = "Extract all cookies info"
59+ cookies = {"cookies_key" : "cookies_value" }
60+
61+ try :
62+ # Perform the scraping with cookies
63+ response = await client .smartscraper (
64+ website_url = website_url ,
65+ user_prompt = user_prompt ,
66+ cookies = cookies ,
67+ output_schema = CookieInfo ,
68+ )
69+
70+ # Print the results
71+ print ("\n Extracted Cookie Information:" )
72+ print (json .dumps (response , indent = 2 ))
73+
74+ except Exception as e :
75+ print (f"Error occurred: { str (e )} " )
76+
77+ # Example 2: Cookies with infinite scrolling
78+ print ("\n " + "=" * 60 )
79+ print ("EXAMPLE 2: Cookies with Infinite Scrolling" )
80+ print ("=" * 60 )
81+
82+ website_url = "https://httpbin.org/cookies"
83+ user_prompt = "Extract all cookies and scroll information"
84+ cookies = {"session_id" : "abc123" , "user_token" : "xyz789" }
85+
86+ try :
87+ # Perform the scraping with cookies and infinite scrolling
88+ response = await client .smartscraper (
89+ website_url = website_url ,
90+ user_prompt = user_prompt ,
91+ cookies = cookies ,
92+ number_of_scrolls = 3 ,
93+ output_schema = CookieInfo ,
94+ )
95+
96+ # Print the results
97+ print ("\n Extracted Cookie Information with Scrolling:" )
98+ print (json .dumps (response , indent = 2 ))
99+
100+ except Exception as e :
101+ print (f"Error occurred: { str (e )} " )
102+
103+ # Example 3: Cookies with pagination
104+ print ("\n " + "=" * 60 )
105+ print ("EXAMPLE 3: Cookies with Pagination" )
106+ print ("=" * 60 )
107+
108+ website_url = "https://httpbin.org/cookies"
109+ user_prompt = "Extract all cookies from multiple pages"
110+ cookies = {"auth_token" : "secret123" , "preferences" : "dark_mode" }
111+
112+ try :
113+ # Perform the scraping with cookies and pagination
114+ response = await client .smartscraper (
115+ website_url = website_url ,
116+ user_prompt = user_prompt ,
117+ cookies = cookies ,
118+ total_pages = 3 ,
119+ output_schema = CookieInfo ,
120+ )
121+
122+ # Print the results
123+ print ("\n Extracted Cookie Information with Pagination:" )
124+ print (json .dumps (response , indent = 2 ))
125+
126+ except Exception as e :
127+ print (f"Error occurred: { str (e )} " )
128+
129+
130+ if __name__ == "__main__" :
131+ asyncio .run (main ())
0 commit comments