Skip to content

Commit 6b384af

Browse files
authored
Merge pull request #41 from ScrapeGraphAI/update-search
feat: add search number example
2 parents 7fef9f1 + 4e93394 commit 6b384af

File tree

11 files changed

+505
-23
lines changed

11 files changed

+505
-23
lines changed

scrapegraph-js/examples/schema_searchScraper_example.js

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
/**
2+
* Schema-based SearchScraper Example
3+
*
4+
* This example demonstrates both schema-based output and configurable website limits:
5+
* - Default: 3 websites (30 credits)
6+
* - Enhanced: 5 websites (50 credits) - provides more comprehensive data for schema
7+
* - Maximum: 20 websites (200 credits) - for highly detailed schema population
8+
*/
9+
110
import { searchScraper } from 'scrapegraph-js';
211
import { z } from 'zod';
312
import 'dotenv/config';
@@ -11,9 +20,25 @@ const schema = z.object({
1120
major_features: z.array(z.string()),
1221
});
1322

23+
// Configure number of websites for better schema population
24+
const numResults = 5; // Enhanced search for better schema data (50 credits)
25+
1426
try {
15-
const response = await searchScraper(apiKey, prompt, schema);
16-
console.log(response.result);
27+
console.log(`🔍 Searching ${numResults} websites with custom schema`);
28+
console.log(`💳 Credits required: ${numResults <= 3 ? 30 : 30 + (numResults - 3) * 10}`);
29+
console.log('-'.repeat(60));
30+
31+
const response = await searchScraper(apiKey, prompt, numResults, schema);
32+
33+
console.log('✅ Schema-based search completed successfully!');
34+
console.log('\n📋 STRUCTURED RESULT:');
35+
console.log(JSON.stringify(response.result, null, 2));
36+
37+
console.log('\n🔗 Reference URLs:');
38+
response.reference_urls?.forEach((url, index) => {
39+
console.log(`${index + 1}. ${url}`);
40+
});
41+
1742
} catch (error) {
18-
console.error(error);
43+
console.error('❌ Error:', error.message);
1944
}
Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
/**
2+
* Enhanced SearchScraper Example
3+
*
4+
* This example demonstrates the SearchScraper API with configurable website limits.
5+
* Issue #144 enhancement allows users to search up to 20 websites (increased from the previous limit of 3)
6+
* with a dynamic credit pricing system.
7+
*
8+
* Key Features:
9+
* - Configurable website limits (3-20 websites)
10+
* - Dynamic credit pricing: 30 credits base + 10 credits per additional website
11+
* - Enhanced research depth and accuracy
12+
* - Backward compatibility with existing applications
13+
*
14+
* Cost Structure:
15+
* - Base cost: 30 credits for 3 websites (default)
16+
* - Additional websites: 10 credits each (e.g., 5 websites = 30 + 2*10 = 50 credits)
17+
* - Maximum websites: 20 (total cost: 30 + 17*10 = 200 credits)
18+
*
19+
* Requirements:
20+
* - Node.js
21+
* - scrapegraph-js package
22+
* - dotenv package
23+
* - A .env file with your SGAI_APIKEY
24+
*
25+
* Example .env file:
26+
* SGAI_APIKEY=your_api_key_here
27+
*/
28+
29+
import { searchScraper } from 'scrapegraph-js';
30+
import 'dotenv/config';
31+
32+
/**
33+
* Calculate the required credits for a SearchScraper request.
34+
* @param {number} numWebsites - Number of websites to scrape (3-20)
35+
* @returns {number} Total credits required
36+
*/
37+
function calculateCredits(numWebsites) {
38+
// Validate website count
39+
const validatedCount = Math.max(3, Math.min(20, numWebsites));
40+
41+
// Calculate credits: 30 base + 10 per extra website
42+
if (validatedCount <= 3) {
43+
return 30;
44+
} else {
45+
const extraWebsites = validatedCount - 3;
46+
return 30 + (extraWebsites * 10);
47+
}
48+
}
49+
50+
/**
51+
* Query the Enhanced SearchScraper API for search results.
52+
* @param {string} userPrompt - The search prompt string
53+
* @param {number} numResults - Number of websites to scrape (3-20). Default is 3.
54+
* @returns {Promise<Object>} The search results with metadata
55+
*/
56+
async function searchScraperQuery(userPrompt, numResults = 3) {
57+
const apiKey = process.env.SGAI_APIKEY;
58+
59+
if (!apiKey) {
60+
throw new Error('SGAI_APIKEY not found in environment variables. Please create a .env file with: SGAI_APIKEY=your_api_key_here');
61+
}
62+
63+
// Validate and calculate credits
64+
const validatedWebsites = Math.max(3, Math.min(20, numResults));
65+
const requiredCredits = calculateCredits(validatedWebsites);
66+
67+
console.log(`🔍 Search Prompt: ${userPrompt}`);
68+
console.log(`🌐 Requested websites: ${numResults} → Validated: ${validatedWebsites}`);
69+
console.log(`💳 Required credits: ${requiredCredits}`);
70+
console.log('-'.repeat(60));
71+
72+
const startTime = Date.now();
73+
74+
try {
75+
const response = await searchScraper(apiKey, userPrompt, numResults);
76+
const executionTime = (Date.now() - startTime) / 1000;
77+
78+
console.log(`⏱️ Execution time: ${executionTime.toFixed(2)} seconds`);
79+
80+
// Extract result data
81+
const resultData = {
82+
result: response.result || '',
83+
references: response.reference_urls || [],
84+
metadata: {
85+
request_id: response.request_id,
86+
num_results: validatedWebsites,
87+
execution_time: executionTime,
88+
required_credits: requiredCredits,
89+
},
90+
};
91+
92+
console.log(`✅ Found ${resultData.references.length} reference sources`);
93+
console.log(`📊 Credits used: ${requiredCredits}`);
94+
95+
return resultData;
96+
97+
} catch (error) {
98+
const executionTime = (Date.now() - startTime) / 1000;
99+
console.log(`⏱️ Execution time: ${executionTime.toFixed(2)} seconds`);
100+
console.log(`❌ Error: ${error.message}`);
101+
throw error;
102+
}
103+
}
104+
105+
/**
106+
* Demonstrate the benefits of different website scaling options.
107+
*/
108+
function demonstrateScalingBenefits() {
109+
console.log('💰 SEARCHSCRAPER CREDIT SCALING');
110+
console.log('='.repeat(50));
111+
112+
const scalingExamples = [
113+
[3, 'Standard Search (Default)'],
114+
[5, 'Enhanced Search (More Sources)'],
115+
[10, 'Comprehensive Search (Deep Research)'],
116+
[15, 'Extensive Search (Maximum Coverage)'],
117+
[20, 'Ultimate Search (Complete Coverage)'],
118+
];
119+
120+
scalingExamples.forEach(([websites, description]) => {
121+
const credits = calculateCredits(websites);
122+
const extraWebsites = Math.max(0, websites - 3);
123+
const efficiency = websites / credits;
124+
125+
console.log(`🌐 ${websites.toString().padStart(2)} websites (${description})`);
126+
console.log(` 💳 ${credits.toString().padStart(3)} credits (base: 30 + ${extraWebsites} × 10)`);
127+
console.log(` 📊 Efficiency: ${efficiency.toFixed(3)} websites/credit`);
128+
console.log();
129+
});
130+
}
131+
132+
/**
133+
* Run the same query with different website limits to show the benefit.
134+
*/
135+
async function runComparisonExample() {
136+
const query = 'Latest advancements in artificial intelligence 2024';
137+
138+
console.log('🔬 COMPARISON: STANDARD vs ENHANCED SEARCH');
139+
console.log('='.repeat(60));
140+
console.log(`Query: ${query}`);
141+
console.log();
142+
143+
// Test different configurations
144+
const configurations = [
145+
{ websites: 3, description: 'Standard Search' },
146+
{ websites: 7, description: 'Enhanced Search' },
147+
];
148+
149+
const results = {};
150+
151+
for (const config of configurations) {
152+
const { websites, description } = config;
153+
154+
console.log(`🚀 Running ${description} (${websites} websites)...`);
155+
try {
156+
const result = await searchScraperQuery(query, websites);
157+
results[websites] = result;
158+
console.log(`✅ ${description} completed successfully`);
159+
console.log(` 📄 Result length: ${result.result.length} characters`);
160+
console.log(` 🔗 References: ${result.references.length} sources`);
161+
console.log();
162+
} catch (error) {
163+
console.log(`❌ ${description} failed: ${error.message}`);
164+
console.log();
165+
}
166+
}
167+
168+
// Show comparison summary
169+
const resultKeys = Object.keys(results);
170+
if (resultKeys.length > 1) {
171+
console.log('📊 COMPARISON SUMMARY');
172+
console.log('-'.repeat(40));
173+
resultKeys.forEach(websites => {
174+
const result = results[websites];
175+
const metadata = result.metadata;
176+
console.log(
177+
`🌐 ${websites} websites: ${result.references.length} sources, ` +
178+
`${metadata.required_credits} credits, ` +
179+
`${metadata.execution_time.toFixed(1)}s`
180+
);
181+
});
182+
}
183+
}
184+
185+
/**
186+
* Run concurrent searches to demonstrate parallel processing
187+
*/
188+
async function runConcurrentExample() {
189+
console.log('🚀 CONCURRENT REQUESTS EXAMPLE');
190+
console.log('='.repeat(50));
191+
192+
// Define multiple queries with different website limits
193+
const queries = [
194+
['JavaScript best practices 2024', 3],
195+
['React vs Vue comparison', 5],
196+
['Node.js performance optimization', 4],
197+
];
198+
199+
console.log('🔄 Running concurrent searches...');
200+
const startTime = Date.now();
201+
202+
try {
203+
// Create promises for concurrent execution
204+
const promises = queries.map(([query, numResults]) =>
205+
searchScraperQuery(query, numResults)
206+
);
207+
208+
// Wait for all requests to complete
209+
const results = await Promise.allSettled(promises);
210+
const totalTime = (Date.now() - startTime) / 1000;
211+
212+
console.log(`⏱️ Total concurrent execution time: ${totalTime.toFixed(2)} seconds`);
213+
console.log();
214+
215+
const successfulResults = results.filter(r => r.status === 'fulfilled').map(r => r.value);
216+
const failedResults = results.filter(r => r.status === 'rejected');
217+
218+
console.log(`✅ Successful requests: ${successfulResults.length}`);
219+
console.log(`❌ Failed requests: ${failedResults.length}`);
220+
221+
if (successfulResults.length > 0) {
222+
const totalCredits = successfulResults.reduce((sum, r) => sum + r.metadata.required_credits, 0);
223+
const totalSources = successfulResults.reduce((sum, r) => sum + r.references.length, 0);
224+
console.log(`💳 Total credits used: ${totalCredits}`);
225+
console.log(`🔗 Total sources gathered: ${totalSources}`);
226+
}
227+
228+
if (failedResults.length > 0) {
229+
console.log('\n❌ Failed requests:');
230+
failedResults.forEach((result, index) => {
231+
console.log(` ${index + 1}. ${result.reason.message}`);
232+
});
233+
}
234+
235+
} catch (error) {
236+
console.log(`❌ Concurrent execution failed: ${error.message}`);
237+
}
238+
239+
console.log();
240+
}
241+
242+
/**
243+
* Main function demonstrating enhanced SearchScraper features.
244+
*/
245+
async function main() {
246+
console.log('🚀 ENHANCED SEARCHSCRAPER DEMONSTRATION');
247+
console.log('🔗 Issue #144: SearchScraper Website Limit Enhancement');
248+
console.log('='.repeat(70));
249+
console.log();
250+
251+
// Check API key
252+
const apiKey = process.env.SGAI_APIKEY;
253+
if (!apiKey) {
254+
console.log('❌ Error: SGAI_APIKEY not found in .env file');
255+
console.log('Please create a .env file with your API key:');
256+
console.log('SGAI_APIKEY=your_api_key_here');
257+
console.log();
258+
console.log('📖 Showing credit scaling demonstration without API calls...');
259+
console.log();
260+
demonstrateScalingBenefits();
261+
return;
262+
}
263+
264+
try {
265+
// 1. Show credit scaling
266+
demonstrateScalingBenefits();
267+
268+
// 2. Run basic example
269+
console.log('🎯 BASIC EXAMPLE');
270+
console.log('='.repeat(30));
271+
272+
const userPrompt = 'What are the latest trends in machine learning?';
273+
const numResults = 5; // Enhanced search with 5 websites
274+
275+
try {
276+
const results = await searchScraperQuery(userPrompt, numResults);
277+
278+
console.log();
279+
console.log('📋 RESULTS SUMMARY:');
280+
console.log(` 🔍 Query: ${userPrompt}`);
281+
console.log(` 🌐 Websites scraped: ${results.metadata.num_results}`);
282+
console.log(` 💳 Credits used: ${results.metadata.required_credits}`);
283+
console.log(` ⏱️ Execution time: ${results.metadata.execution_time.toFixed(1)}s`);
284+
console.log(` 🔗 Reference sources: ${results.references.length}`);
285+
console.log();
286+
287+
// Show a portion of the result
288+
const resultText = results.result;
289+
if (resultText.length > 300) {
290+
console.log(`📄 Result preview: ${resultText.substring(0, 300)}...`);
291+
} else {
292+
console.log(`📄 Result: ${resultText}`);
293+
}
294+
console.log();
295+
296+
// Show references
297+
console.log('🔗 REFERENCE SOURCES:');
298+
results.references.slice(0, 5).forEach((ref, i) => {
299+
console.log(` ${i + 1}. ${ref}`);
300+
});
301+
if (results.references.length > 5) {
302+
console.log(` ... and ${results.references.length - 5} more sources`);
303+
}
304+
console.log();
305+
306+
} catch (error) {
307+
console.log(`❌ Error: ${error.message}`);
308+
console.log();
309+
}
310+
311+
// 3. Run comparison example
312+
await runComparisonExample();
313+
314+
// 4. Run concurrent example
315+
await runConcurrentExample();
316+
317+
console.log('✨ Enhanced SearchScraper demonstration completed!');
318+
console.log();
319+
console.log('🎯 Key Enhancement Benefits:');
320+
console.log(' • Configurable website limits (3-20)');
321+
console.log(' • Transparent credit pricing');
322+
console.log(' • Better research depth and accuracy');
323+
console.log(' • Maintained backward compatibility');
324+
console.log(' • Enhanced data validation through multiple sources');
325+
console.log(' • Concurrent request support for better performance');
326+
327+
} catch (error) {
328+
console.log(`❌ Unexpected error: ${error.message}`);
329+
}
330+
}
331+
332+
// Run the demonstration
333+
main().catch(console.error);

0 commit comments

Comments
 (0)