change scrapeconfig param types to enums

mazen-r · mazen-r · commit 17f5c035d84c · 2024-07-05T11:44:25.000+03:00
diff --git a/__tests__/scrapeconfig.test.ts b/__tests__/scrapeconfig.test.ts
@@ -1,4 +1,4 @@
-import { ScrapeConfig } from '../src/scrapeconfig.js';
+import { ScrapeConfig, ScreenshotFlags, Format } from '../src/scrapeconfig.js';
 import { HttpMethod } from '../src/types.js';
 import { ScrapeConfigError } from '../src/errors.js';
 import { describe, it, expect } from '@jest/globals';
@@ -199,11 +199,11 @@ describe('url param generation', () => {
             url: 'http://httpbin.dev/get',
             screenshots: { everything: 'fullpage' },
             screenshot_flags: [
-                "load_images",
-                "dark_mode",
-                "block_banners",
-                "high_quality",
-                "print_media_format"
+                ScreenshotFlags.LOAD_IMAGES, // Enable image rendering with the request, adds extra usage for the bandwidth consumed
+                ScreenshotFlags.DARK_MODE, // Enable dark mode display
+                ScreenshotFlags.BLOCK_BANNERS, // Block cookies banners and overlay that cover the screen
+                ScreenshotFlags.HIGH_QUALITY, // No compression on the output image
+                ScreenshotFlags.LOAD_IMAGES, // Render the page in the print mode
             ],
             render_js: true,
         });
@@ -262,7 +262,7 @@ describe('url param generation', () => {
     it('format set', () => {
         const config = new ScrapeConfig({
             url: 'http://httpbin.dev/get',
-            format: "markdown",
+            format: Format.MARKDOWN,
         });
         expect(config.toApiParams({ key: '1234' })).toEqual({
             key: '1234',
diff --git a/examples/scrape/scrape-as-markdown.js b/examples/scrape/scrape-as-markdown.js
@@ -1,7 +1,7 @@
 /*
 This example shows how to capture page screenshots with images and additional configuration in scrapfly
 */
-import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
+import { ScrapflyClient, ScrapeConfig, ScrapeFormat } from 'scrapfly-sdk';
 
 const key = 'YOUR SCRAPFLY KEY';
 const client = new ScrapflyClient({ key });
@@ -10,7 +10,7 @@ const result = await client.scrape(
         url: 'https://web-scraping.dev/products/',
         // scrape the page data as markdown format supproted by LLMs.
         // None=raw(unchanged), other supported formats are: json, text, clean_html         
-        format: "markdown"
+        format: ScrapeFormat.MARKDOWN
     }),
 );
 console.log(result.result.content);
diff --git a/examples/screenshot/basic-screenshot-wtih-scrape-api.js b/examples/screenshot/basic-screenshot-wtih-scrape-api.js
@@ -42,15 +42,12 @@ console.log(result.result.screenshots);
 */
 
 // To save screenshot to file you can download the screenshot from the result urls
-import axios from 'axios';
 import fs from 'fs';
 for (let [name, screenshot] of Object.entries(result.result.screenshots)) {
-    let response = await axios.get(screenshot.url, {
-        // note: don't forget to add your API key parameter:
-        params: { key: key },
-        // this indicates that response is binary data:
-        responseType: 'arraybuffer',
-    });
-    // write to screenshot data to a file in currenct directory:
-    fs.writeFileSync(`example-screenshot-${name}.${screenshot.extension}`, response.data);
+    const url = new URL(screenshot.url);
+    // note: don't forget to add your API key parameter:        
+    url.searchParams.append('key', key);
+    const response = await fetch(url.href);
+    const content = Buffer.from(await response.arrayBuffer());
+    fs.writeFileSync(`screenshots/example-screenshot-${name}.${screenshot.extension}`, content, 'binary');
 }
diff --git a/examples/screenshot/screenshot-with-scrape-api.js b/examples/screenshot/screenshot-with-scrape-api.js
@@ -1,13 +1,13 @@
 /*
 This example shows how to capture page screenshots with images and additional configuration in scrapfly
 */
-import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
+import { ScrapflyClient, ScrapeConfig, ScreenshotFlags } from 'scrapfly-sdk';
 
 const key = 'YOUR SCRAPFLY KEY';
 const client = new ScrapflyClient({ key });
 const result = await client.scrape(
     new ScrapeConfig({
-        url: 'https://web-scraping.dev/products/',
+        url: 'https://web-scraping.dev/product/2',
         // enable headless browsers for screenshots
         render_js: true,
         // optional: you can wait for page to load before capturing
@@ -16,12 +16,22 @@ const result = await client.scrape(
             reviews: '#reviews',
         },
         screenshot_flags: [
-            "load_images", // Enable image rendering with the request, adds extra usage for the bandwidth consumed
-            "dark_mode", // Enable dark mode display
-            "block_banners", // Block cookies banners and overlay that cover the screen
-            "high_quality", // No compression on the output image
-            "print_media_format" // Render the page in the print mode            
-        ]
+            ScreenshotFlags.LOAD_IMAGES, // Enable image rendering with the request, adds extra usage for the bandwidth consumed
+            ScreenshotFlags.BLOCK_BANNERS, // Block cookies banners and overlay that cover the screen
+            ScreenshotFlags.HIGH_QUALITY, // No compression on the output image
+            ScreenshotFlags.LOAD_IMAGES, // Render the page in the print mode
+        ],
     }),
 );
 console.log(result.result.screenshots);
+
+// To save screenshot to file you can download the screenshot from the result urls
+import fs from 'fs';
+for (let [name, screenshot] of Object.entries(result.result.screenshots)) {
+    const url = new URL(screenshot.url);
+    // note: don't forget to add your API key parameter:
+    url.searchParams.append('key', key);
+    const response = await fetch(url.href);
+    const content = Buffer.from(await response.arrayBuffer());
+    fs.writeFileSync(`screenshots/example-screenshot-${name}.${screenshot.extension}`, content, 'binary');
+}
diff --git a/examples/screenshot/screenshots/example-screenshot-everything.jpg b/examples/screenshot/screenshots/example-screenshot-everything.jpg
diff --git a/examples/screenshot/screenshots/example-screenshot-reviews.jpg b/examples/screenshot/screenshots/example-screenshot-reviews.jpg
diff --git a/src/main.ts b/src/main.ts
@@ -1,5 +1,5 @@
 export { ScrapflyClient } from './client.js';
-export { ScrapeConfig } from './scrapeconfig.js';
+export { ScrapeConfig, ScreenshotFlags, Format as ScrapeFormat } from './scrapeconfig.js';
 export { ScreenshotConfig, Format as ScreenshotFormat, Options as ScreenshotOptions } from './screenshotconfig.js';
 export * as errors from './errors.js';
 export {
diff --git a/src/scrapeconfig.ts b/src/scrapeconfig.ts
@@ -3,21 +3,49 @@ import { log } from './logger.js';
 import { Rec, HttpMethod } from './types.js';
 import { ScrapeConfigError } from './errors.js';
 
-type ScreenshotFlags = "load_images" | "dark_mode" | "block_banners" | "high_quality" | "print_media_format";
-type Format = "raw" | "json" | "text" | "markdown" | "clean_html";
+export enum ScreenshotFlags {
+    /**
+    Options to customize the screenshot behavior
+    Attributes:
+        LOAD_IMAGES: Enable image rendering with the request, add extra usage for the bandwidth consumed.
+        DARK_MODE: Enable dark mode display.
+        BLOCK_BANNERS: Block cookies banners and overlay that cover the screen.
+        PRINT_MEDIA_FORMAT: Render the page in the print mode.
+    */
+    LOAD_IMAGES = 'load_images',
+    DARK_MODE = 'dark_mode',
+    BLOCK_BANNERS = 'block_banners',
+    PRINT_MEDIA_FORMAT = 'print_media_format',
+    HIGH_QUALITY = 'high_quality',
+}
+
+export enum Format {
+    /**
+    Format of the scraped content.
+    Attributes:
+        JSON: JSON format.
+        TEXT: Text format.
+        MARKDOWN: Markdown format.
+        CLEAN_HTML: Clean HTML format.
+    */
+    JSON = 'json',
+    TEXT = 'text',
+    MARKDOWN = 'markdown',
+    CLEAN_HTML = 'clean_html',
+}
 
 export class ScrapeConfig {
     static PUBLIC_DATACENTER_POOL = 'public_datacenter_pool';
     static PUBLIC_RESIDENTIAL_POOL = 'public_residential_pool';
-    
+
     url: string;
     retry = true;
     method: HttpMethod = 'GET';
     country?: string = null;
     render_js = false;
     cache = false;
     cache_clear = false;
-    cost_budget?: number = null
+    cost_budget?: number = null;
     ssl = false;
     dns = false;
     asp = false;
@@ -85,6 +113,17 @@ export class ScrapeConfig {
         lang?: string[];
         auto_scroll?: boolean;
     }) {
+        if (options.format && !Object.values(Format).includes(options.format)) {
+            throw new ScrapeConfigError(`Invalid format param value: ${options.format}`);
+        }
+        this.format = options.format ?? this.format;
+        if (options.screenshot_flags) {
+            options.screenshot_flags.forEach((flag) => {
+                if (!Object.values(ScreenshotFlags).includes(flag)) {
+                    throw new ScrapeConfigError(`Invalid screenshot_flags param value: ${flag}`);
+                }
+            });
+        }
         this.url = options.url;
         this.retry = options.retry ?? this.retry;
         this.method = options.method ?? this.method;
@@ -209,7 +248,7 @@ export class ScrapeConfig {
             } else {
                 if (this.screenshot_flags) {
                     log.warn('Params "screenshot_flags" is ignored. Works only if screenshots is enabled');
-                }                
+                }
             }
             if (this.auto_scroll !== null) {
                 params.auto_scroll = this.auto_scroll;