Skip to content

Commit 17f5c03

Browse files
committed
change scrapeconfig param types to enums
1 parent 628e823 commit 17f5c03

File tree

8 files changed

+78
-32
lines changed

8 files changed

+78
-32
lines changed

__tests__/scrapeconfig.test.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { ScrapeConfig } from '../src/scrapeconfig.js';
1+
import { ScrapeConfig, ScreenshotFlags, Format } from '../src/scrapeconfig.js';
22
import { HttpMethod } from '../src/types.js';
33
import { ScrapeConfigError } from '../src/errors.js';
44
import { describe, it, expect } from '@jest/globals';
@@ -199,11 +199,11 @@ describe('url param generation', () => {
199199
url: 'http://httpbin.dev/get',
200200
screenshots: { everything: 'fullpage' },
201201
screenshot_flags: [
202-
"load_images",
203-
"dark_mode",
204-
"block_banners",
205-
"high_quality",
206-
"print_media_format"
202+
ScreenshotFlags.LOAD_IMAGES, // Enable image rendering with the request, adds extra usage for the bandwidth consumed
203+
ScreenshotFlags.DARK_MODE, // Enable dark mode display
204+
ScreenshotFlags.BLOCK_BANNERS, // Block cookies banners and overlay that cover the screen
205+
ScreenshotFlags.HIGH_QUALITY, // No compression on the output image
206+
ScreenshotFlags.LOAD_IMAGES, // Render the page in the print mode
207207
],
208208
render_js: true,
209209
});
@@ -262,7 +262,7 @@ describe('url param generation', () => {
262262
it('format set', () => {
263263
const config = new ScrapeConfig({
264264
url: 'http://httpbin.dev/get',
265-
format: "markdown",
265+
format: Format.MARKDOWN,
266266
});
267267
expect(config.toApiParams({ key: '1234' })).toEqual({
268268
key: '1234',
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
This example shows how to capture page screenshots with images and additional configuration in scrapfly
33
*/
4-
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
4+
import { ScrapflyClient, ScrapeConfig, ScrapeFormat } from 'scrapfly-sdk';
55

66
const key = 'YOUR SCRAPFLY KEY';
77
const client = new ScrapflyClient({ key });
@@ -10,7 +10,7 @@ const result = await client.scrape(
1010
url: 'https://web-scraping.dev/products/',
1111
// scrape the page data as markdown format supproted by LLMs.
1212
// None=raw(unchanged), other supported formats are: json, text, clean_html
13-
format: "markdown"
13+
format: ScrapeFormat.MARKDOWN
1414
}),
1515
);
1616
console.log(result.result.content);

examples/screenshot/basic-screenshot-wtih-scrape-api.js

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,12 @@ console.log(result.result.screenshots);
4242
*/
4343

4444
// To save screenshot to file you can download the screenshot from the result urls
45-
import axios from 'axios';
4645
import fs from 'fs';
4746
for (let [name, screenshot] of Object.entries(result.result.screenshots)) {
48-
let response = await axios.get(screenshot.url, {
49-
// note: don't forget to add your API key parameter:
50-
params: { key: key },
51-
// this indicates that response is binary data:
52-
responseType: 'arraybuffer',
53-
});
54-
// write to screenshot data to a file in currenct directory:
55-
fs.writeFileSync(`example-screenshot-${name}.${screenshot.extension}`, response.data);
47+
const url = new URL(screenshot.url);
48+
// note: don't forget to add your API key parameter:
49+
url.searchParams.append('key', key);
50+
const response = await fetch(url.href);
51+
const content = Buffer.from(await response.arrayBuffer());
52+
fs.writeFileSync(`screenshots/example-screenshot-${name}.${screenshot.extension}`, content, 'binary');
5653
}
Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
/*
22
This example shows how to capture page screenshots with images and additional configuration in scrapfly
33
*/
4-
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
4+
import { ScrapflyClient, ScrapeConfig, ScreenshotFlags } from 'scrapfly-sdk';
55

66
const key = 'YOUR SCRAPFLY KEY';
77
const client = new ScrapflyClient({ key });
88
const result = await client.scrape(
99
new ScrapeConfig({
10-
url: 'https://web-scraping.dev/products/',
10+
url: 'https://web-scraping.dev/product/2',
1111
// enable headless browsers for screenshots
1212
render_js: true,
1313
// optional: you can wait for page to load before capturing
@@ -16,12 +16,22 @@ const result = await client.scrape(
1616
reviews: '#reviews',
1717
},
1818
screenshot_flags: [
19-
"load_images", // Enable image rendering with the request, adds extra usage for the bandwidth consumed
20-
"dark_mode", // Enable dark mode display
21-
"block_banners", // Block cookies banners and overlay that cover the screen
22-
"high_quality", // No compression on the output image
23-
"print_media_format" // Render the page in the print mode
24-
]
19+
ScreenshotFlags.LOAD_IMAGES, // Enable image rendering with the request, adds extra usage for the bandwidth consumed
20+
ScreenshotFlags.BLOCK_BANNERS, // Block cookies banners and overlay that cover the screen
21+
ScreenshotFlags.HIGH_QUALITY, // No compression on the output image
22+
ScreenshotFlags.LOAD_IMAGES, // Render the page in the print mode
23+
],
2524
}),
2625
);
2726
console.log(result.result.screenshots);
27+
28+
// To save screenshot to file you can download the screenshot from the result urls
29+
import fs from 'fs';
30+
for (let [name, screenshot] of Object.entries(result.result.screenshots)) {
31+
const url = new URL(screenshot.url);
32+
// note: don't forget to add your API key parameter:
33+
url.searchParams.append('key', key);
34+
const response = await fetch(url.href);
35+
const content = Buffer.from(await response.arrayBuffer());
36+
fs.writeFileSync(`screenshots/example-screenshot-${name}.${screenshot.extension}`, content, 'binary');
37+
}
498 KB
Loading
66.9 KB
Loading

src/main.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
export { ScrapflyClient } from './client.js';
2-
export { ScrapeConfig } from './scrapeconfig.js';
2+
export { ScrapeConfig, ScreenshotFlags, Format as ScrapeFormat } from './scrapeconfig.js';
33
export { ScreenshotConfig, Format as ScreenshotFormat, Options as ScreenshotOptions } from './screenshotconfig.js';
44
export * as errors from './errors.js';
55
export {

src/scrapeconfig.ts

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,49 @@ import { log } from './logger.js';
33
import { Rec, HttpMethod } from './types.js';
44
import { ScrapeConfigError } from './errors.js';
55

6-
type ScreenshotFlags = "load_images" | "dark_mode" | "block_banners" | "high_quality" | "print_media_format";
7-
type Format = "raw" | "json" | "text" | "markdown" | "clean_html";
6+
export enum ScreenshotFlags {
7+
/**
8+
Options to customize the screenshot behavior
9+
Attributes:
10+
LOAD_IMAGES: Enable image rendering with the request, add extra usage for the bandwidth consumed.
11+
DARK_MODE: Enable dark mode display.
12+
BLOCK_BANNERS: Block cookies banners and overlay that cover the screen.
13+
PRINT_MEDIA_FORMAT: Render the page in the print mode.
14+
*/
15+
LOAD_IMAGES = 'load_images',
16+
DARK_MODE = 'dark_mode',
17+
BLOCK_BANNERS = 'block_banners',
18+
PRINT_MEDIA_FORMAT = 'print_media_format',
19+
HIGH_QUALITY = 'high_quality',
20+
}
21+
22+
export enum Format {
23+
/**
24+
Format of the scraped content.
25+
Attributes:
26+
JSON: JSON format.
27+
TEXT: Text format.
28+
MARKDOWN: Markdown format.
29+
CLEAN_HTML: Clean HTML format.
30+
*/
31+
JSON = 'json',
32+
TEXT = 'text',
33+
MARKDOWN = 'markdown',
34+
CLEAN_HTML = 'clean_html',
35+
}
836

937
export class ScrapeConfig {
1038
static PUBLIC_DATACENTER_POOL = 'public_datacenter_pool';
1139
static PUBLIC_RESIDENTIAL_POOL = 'public_residential_pool';
12-
40+
1341
url: string;
1442
retry = true;
1543
method: HttpMethod = 'GET';
1644
country?: string = null;
1745
render_js = false;
1846
cache = false;
1947
cache_clear = false;
20-
cost_budget?: number = null
48+
cost_budget?: number = null;
2149
ssl = false;
2250
dns = false;
2351
asp = false;
@@ -85,6 +113,17 @@ export class ScrapeConfig {
85113
lang?: string[];
86114
auto_scroll?: boolean;
87115
}) {
116+
if (options.format && !Object.values(Format).includes(options.format)) {
117+
throw new ScrapeConfigError(`Invalid format param value: ${options.format}`);
118+
}
119+
this.format = options.format ?? this.format;
120+
if (options.screenshot_flags) {
121+
options.screenshot_flags.forEach((flag) => {
122+
if (!Object.values(ScreenshotFlags).includes(flag)) {
123+
throw new ScrapeConfigError(`Invalid screenshot_flags param value: ${flag}`);
124+
}
125+
});
126+
}
88127
this.url = options.url;
89128
this.retry = options.retry ?? this.retry;
90129
this.method = options.method ?? this.method;
@@ -209,7 +248,7 @@ export class ScrapeConfig {
209248
} else {
210249
if (this.screenshot_flags) {
211250
log.warn('Params "screenshot_flags" is ignored. Works only if screenshots is enabled');
212-
}
251+
}
213252
}
214253
if (this.auto_scroll !== null) {
215254
params.auto_scroll = this.auto_scroll;

0 commit comments

Comments
 (0)