1313import json
1414import os
1515import time
16- from typing import Dict , Any
16+ from typing import Dict , Any , List , Optional
1717
1818from dotenv import load_dotenv
1919
20+ from pydantic import BaseModel , EmailStr , HttpUrl
2021from scrapegraph_py import Client
2122
2223# Load environment variables from .env file
2324load_dotenv ()
2425
26+ # Pydantic models for schema
27+ class SocialLinks (BaseModel ):
28+ github : Optional [HttpUrl ]
29+ linkedin : Optional [HttpUrl ]
30+ twitter : Optional [HttpUrl ]
31+
32+ class Company (BaseModel ):
33+ name : str
34+ description : str
35+ features : Optional [List [str ]] = None
36+ contact_email : Optional [EmailStr ] = None
37+ social_links : Optional [SocialLinks ] = None
38+
39+ class Service (BaseModel ):
40+ service_name : str
41+ description : str
42+ features : Optional [List [str ]] = None
43+
44+ class Legal (BaseModel ):
45+ privacy_policy : str
46+ terms_of_service : str
47+
48+ class WebsiteContent (BaseModel ):
49+ company : Company
50+ services : List [Service ]
51+ legal : Legal
2552
2653def main ():
2754 if not os .getenv ("SGAI_API_KEY" ):
@@ -31,53 +58,7 @@ def main():
3158 return
3259
3360 # Example schema (from your curl command)
34- schema : Dict [str , Any ] = {
35- "$schema" : "http://json-schema.org/draft-07/schema#" ,
36- "title" : "ScrapeGraphAI Website Content" ,
37- "type" : "object" ,
38- "properties" : {
39- "company" : {
40- "type" : "object" ,
41- "properties" : {
42- "name" : {"type" : "string" },
43- "description" : {"type" : "string" },
44- "features" : {"type" : "array" , "items" : {"type" : "string" }},
45- "contact_email" : {"type" : "string" , "format" : "email" },
46- "social_links" : {
47- "type" : "object" ,
48- "properties" : {
49- "github" : {"type" : "string" , "format" : "uri" },
50- "linkedin" : {"type" : "string" , "format" : "uri" },
51- "twitter" : {"type" : "string" , "format" : "uri" },
52- },
53- "additionalProperties" : False ,
54- },
55- },
56- "required" : ["name" , "description" ],
57- },
58- "services" : {
59- "type" : "array" ,
60- "items" : {
61- "type" : "object" ,
62- "properties" : {
63- "service_name" : {"type" : "string" },
64- "description" : {"type" : "string" },
65- "features" : {"type" : "array" , "items" : {"type" : "string" }},
66- },
67- "required" : ["service_name" , "description" ],
68- },
69- },
70- "legal" : {
71- "type" : "object" ,
72- "properties" : {
73- "privacy_policy" : {"type" : "string" },
74- "terms_of_service" : {"type" : "string" },
75- },
76- "required" : ["privacy_policy" , "terms_of_service" ],
77- },
78- },
79- "required" : ["company" , "services" , "legal" ],
80- }
61+ schema = WebsiteContent .schema ()
8162
8263 url = "https://scrapegraphai.com/"
8364 prompt = (
0 commit comments