Skip to content

Commit 8984f4b

Browse files
committed
feat: add docstring
1 parent d0a10e5 commit 8984f4b

File tree

17 files changed

+617
-22
lines changed

17 files changed

+617
-22
lines changed

scrapegraph-py/scrapegraph_py/__init__.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,51 @@
1+
"""
2+
ScrapeGraphAI Python SDK
3+
4+
A comprehensive Python SDK for the ScrapeGraphAI API, providing both synchronous
5+
and asynchronous clients for all API endpoints.
6+
7+
Main Features:
8+
- SmartScraper: AI-powered web scraping with structured data extraction
9+
- SearchScraper: Web research across multiple sources
10+
- Agentic Scraper: Automated browser interactions and form filling
11+
- Crawl: Website crawling with AI extraction or markdown conversion
12+
- Markdownify: Convert web pages to clean markdown
13+
- Schema Generation: AI-assisted schema creation for data extraction
14+
- Scheduled Jobs: Automate recurring scraping tasks
15+
16+
Quick Start:
17+
>>> from scrapegraph_py import Client
18+
>>>
19+
>>> # Initialize client from environment variables
20+
>>> client = Client.from_env()
21+
>>>
22+
>>> # Basic scraping
23+
>>> result = client.smartscraper(
24+
... website_url="https://example.com",
25+
... user_prompt="Extract all product information"
26+
... )
27+
>>>
28+
>>> # With context manager
29+
>>> with Client.from_env() as client:
30+
... result = client.scrape(website_url="https://example.com")
31+
32+
Async Usage:
33+
>>> import asyncio
34+
>>> from scrapegraph_py import AsyncClient
35+
>>>
36+
>>> async def main():
37+
... async with AsyncClient.from_env() as client:
38+
... result = await client.smartscraper(
39+
... website_url="https://example.com",
40+
... user_prompt="Extract products"
41+
... )
42+
>>>
43+
>>> asyncio.run(main())
44+
45+
For more information visit: https://scrapegraphai.com
46+
Documentation: https://docs.scrapegraphai.com
47+
"""
48+
149
from .async_client import AsyncClient
250
from .client import Client
351

scrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,38 @@
1+
"""
2+
Asynchronous HTTP client for the ScrapeGraphAI API.
3+
4+
This module provides an asynchronous client for interacting with all ScrapeGraphAI
5+
API endpoints including smartscraper, searchscraper, crawl, agentic scraper,
6+
markdownify, schema generation, scheduled jobs, and utility functions.
7+
8+
The AsyncClient class supports:
9+
- API key authentication
10+
- SSL verification configuration
11+
- Request timeout configuration
12+
- Automatic retry logic with exponential backoff
13+
- Mock mode for testing
14+
- Async context manager support for proper resource cleanup
15+
- Concurrent requests using asyncio
16+
17+
Example:
18+
Basic usage with environment variables:
19+
>>> import asyncio
20+
>>> from scrapegraph_py import AsyncClient
21+
>>> async def main():
22+
... client = AsyncClient.from_env()
23+
... result = await client.smartscraper(
24+
... website_url="https://example.com",
25+
... user_prompt="Extract product information"
26+
... )
27+
... await client.close()
28+
>>> asyncio.run(main())
29+
30+
Using async context manager:
31+
>>> async def main():
32+
... async with AsyncClient(api_key="sgai-...") as client:
33+
... result = await client.scrape(website_url="https://example.com")
34+
>>> asyncio.run(main())
35+
"""
136
import asyncio
237
from typing import Any, Dict, Optional, Callable
338

@@ -44,6 +79,30 @@
4479

4580

4681
class AsyncClient:
82+
"""
83+
Asynchronous client for the ScrapeGraphAI API.
84+
85+
This class provides asynchronous methods for all ScrapeGraphAI API endpoints.
86+
It handles authentication, request management, error handling, and supports
87+
mock mode for testing. Uses aiohttp for efficient async HTTP requests.
88+
89+
Attributes:
90+
api_key (str): The API key for authentication
91+
headers (dict): Default headers including API key
92+
timeout (ClientTimeout): Request timeout configuration
93+
max_retries (int): Maximum number of retry attempts
94+
retry_delay (float): Base delay between retries in seconds
95+
mock (bool): Whether mock mode is enabled
96+
session (ClientSession): Aiohttp session for connection pooling
97+
98+
Example:
99+
>>> async def example():
100+
... async with AsyncClient.from_env() as client:
101+
... result = await client.smartscraper(
102+
... website_url="https://example.com",
103+
... user_prompt="Extract all products"
104+
... )
105+
"""
47106
@classmethod
48107
def from_env(
49108
cls,
@@ -144,7 +203,25 @@ def __init__(
144203
logger.info("✅ AsyncClient initialized successfully")
145204

146205
async def _make_request(self, method: str, url: str, **kwargs) -> Any:
147-
"""Make HTTP request with retry logic."""
206+
"""
207+
Make asynchronous HTTP request with retry logic and error handling.
208+
209+
Args:
210+
method: HTTP method (GET, POST, etc.)
211+
url: Full URL for the request
212+
**kwargs: Additional arguments to pass to aiohttp
213+
214+
Returns:
215+
Parsed JSON response data
216+
217+
Raises:
218+
APIError: If the API returns an error response
219+
ConnectionError: If unable to connect after all retries
220+
221+
Note:
222+
In mock mode, this method returns deterministic responses without
223+
making actual HTTP requests.
224+
"""
148225
# Short-circuit when mock mode is enabled
149226
if getattr(self, "mock", False):
150227
return self._mock_response(method, url, **kwargs)

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,31 @@
1-
# Client implementation goes here
1+
"""
2+
Synchronous HTTP client for the ScrapeGraphAI API.
3+
4+
This module provides a synchronous client for interacting with all ScrapeGraphAI
5+
API endpoints including smartscraper, searchscraper, crawl, agentic scraper,
6+
markdownify, schema generation, scheduled jobs, and utility functions.
7+
8+
The Client class supports:
9+
- API key authentication
10+
- SSL verification configuration
11+
- Request timeout configuration
12+
- Automatic retry logic with exponential backoff
13+
- Mock mode for testing
14+
- Context manager support for proper resource cleanup
15+
16+
Example:
17+
Basic usage with environment variables:
18+
>>> from scrapegraph_py import Client
19+
>>> client = Client.from_env()
20+
>>> result = client.smartscraper(
21+
... website_url="https://example.com",
22+
... user_prompt="Extract product information"
23+
... )
24+
25+
Using context manager:
26+
>>> with Client(api_key="sgai-...") as client:
27+
... result = client.scrape(website_url="https://example.com")
28+
"""
229
import uuid as _uuid
330
from typing import Any, Callable, Dict, Optional
431
from urllib.parse import urlparse
@@ -50,6 +77,29 @@
5077

5178

5279
class Client:
80+
"""
81+
Synchronous client for the ScrapeGraphAI API.
82+
83+
This class provides synchronous methods for all ScrapeGraphAI API endpoints.
84+
It handles authentication, request management, error handling, and supports
85+
mock mode for testing.
86+
87+
Attributes:
88+
api_key (str): The API key for authentication
89+
headers (dict): Default headers including API key
90+
timeout (Optional[float]): Request timeout in seconds
91+
max_retries (int): Maximum number of retry attempts
92+
retry_delay (float): Delay between retries in seconds
93+
mock (bool): Whether mock mode is enabled
94+
session (requests.Session): HTTP session for connection pooling
95+
96+
Example:
97+
>>> client = Client.from_env()
98+
>>> result = client.smartscraper(
99+
... website_url="https://example.com",
100+
... user_prompt="Extract all products"
101+
... )
102+
"""
53103
@classmethod
54104
def from_env(
55105
cls,
@@ -173,7 +223,25 @@ def __init__(
173223
logger.info("✅ Client initialized successfully")
174224

175225
def _make_request(self, method: str, url: str, **kwargs) -> Any:
176-
"""Make HTTP request with error handling."""
226+
"""
227+
Make HTTP request with error handling and retry logic.
228+
229+
Args:
230+
method: HTTP method (GET, POST, etc.)
231+
url: Full URL for the request
232+
**kwargs: Additional arguments to pass to requests
233+
234+
Returns:
235+
Parsed JSON response data
236+
237+
Raises:
238+
APIError: If the API returns an error response
239+
ConnectionError: If unable to connect to the API
240+
241+
Note:
242+
In mock mode, this method returns deterministic responses without
243+
making actual HTTP requests.
244+
"""
177245
# Short-circuit when mock mode is enabled
178246
if getattr(self, "mock", False):
179247
return self._mock_response(method, url, **kwargs)

scrapegraph-py/scrapegraph_py/config.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1-
# Configuration and constants
1+
"""
2+
Configuration and constants for the ScrapeGraphAI SDK.
3+
4+
This module contains API configuration settings including the base URL
5+
and default headers used for all API requests.
6+
7+
Attributes:
8+
API_BASE_URL (str): Base URL for the ScrapeGraphAI API endpoints
9+
DEFAULT_HEADERS (dict): Default HTTP headers for API requests
10+
"""
211
API_BASE_URL = "https://api.scrapegraphai.com/v1"
312
DEFAULT_HEADERS = {
413
"accept": "application/json",

scrapegraph-py/scrapegraph_py/exceptions.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,28 @@
1+
"""
2+
Custom exceptions for the ScrapeGraphAI SDK.
3+
4+
This module defines custom exception classes used throughout the SDK
5+
for handling API errors and other exceptional conditions.
6+
"""
7+
8+
19
class APIError(Exception):
2-
"""Base exception for API errors."""
10+
"""
11+
Exception raised for API errors.
12+
13+
This exception is raised when the API returns an error response,
14+
providing both the error message and HTTP status code for debugging.
15+
16+
Attributes:
17+
message (str): The error message from the API
18+
status_code (int): HTTP status code of the error response
19+
20+
Example:
21+
>>> try:
22+
... client.smartscraper(website_url="invalid")
23+
... except APIError as e:
24+
... print(f"API error {e.status_code}: {e.message}")
25+
"""
326

427
def __init__(self, message: str, status_code: int = None):
528
self.status_code = status_code

0 commit comments

Comments
 (0)