Skip to content

Commit 818a369

Browse files
committed
add webhook param support with the screenshot and extraction apis
1 parent 5bf08a1 commit 818a369

File tree

8 files changed

+113
-20
lines changed

8 files changed

+113
-20
lines changed

__tests__/client.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ it('succeeds', async () => {
510510

511511
const result = await client.screenshot(new ScreenshotConfig({ url: url }));
512512
expect(result).toBeDefined();
513-
expect(result.metadata.format).toBe('png');
513+
expect(result.metadata.extension_name).toBe('png');
514514
expect(result.metadata.upstream_url).toEqual(url);
515515
expect(result.metadata.upstream_status_code).toBe(200);
516516
expect(spy).toHaveBeenCalledTimes(1);
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
This example shows how to use the webhook feature with Scrapfly's extraction API
3+
*/
4+
import { ScrapflyClient, ScrapeConfig, ExtractionConfig } from 'scrapfly-sdk';
5+
6+
const key = 'YOUR SCRAPFLY KEY';
7+
const client = new ScrapflyClient({ key });
8+
9+
// First, scrape the web page to retrieve its HTML
10+
const scrapeResult = await client.scrape(
11+
new ScrapeConfig({
12+
url: 'https://web-scraping.dev/products',
13+
render_js: true,
14+
}),
15+
);
16+
17+
// raw HTML content
18+
const html = scrapeResult.result.content;
19+
20+
// Second, pass the HTML and an extraction prompt
21+
// In this example, we'll ask a question about the data
22+
const extractionResult = await client.extract(
23+
new ExtractionConfig({
24+
body: html, // pass the HTML content
25+
content_type: 'text/html', // data content type
26+
charset: 'utf-8', // passed content charset, use `auto` if you aren't sure
27+
extraction_prompt: 'what is the flavor of the dark energy potion?', // LLM extraction prompt
28+
webhook: 'my-webhook'
29+
}),
30+
);
31+
32+
// raw result
33+
console.log(extractionResult.result)
34+
`
35+
{
36+
job_uuid: '7a3aa96d-fb0e-4c45-9b01-7c42f295dcac',
37+
success: true,
38+
webhook_name: 'my-webhook',
39+
webhook_queue_limit: 10000,
40+
webhook_queued_element: 7,
41+
webhook_uuid: 'd7131802-1eba-4cc4-a6fd-5da6c8cf1f35'
42+
}
43+
`
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
This example shows how to use the webhook feature with Scrapfly's screenshot API
3+
*/
4+
import { ScrapflyClient, ScreenshotConfig } from 'scrapfly-sdk';
5+
6+
const key = 'YOUR SCRAPFLY KEY';
7+
const client = new ScrapflyClient({ key });
8+
9+
const screenshotResult = await client.screenshot(
10+
new ScreenshotConfig({
11+
url: 'https://web-scraping.dev/products',
12+
webhook: 'my-webhook'
13+
}),
14+
);
15+
16+
// raw result
17+
console.log(screenshotResult.result)
18+
`
19+
{
20+
job_uuid: 'a0e6f3e8-be35-438a-942a-be77aa545d30',
21+
success: true,
22+
webhook_name: 'my-webhook',
23+
webhook_queue_limit: 10000,
24+
webhook_queued_element: 7,
25+
webhook_uuid: 'cdf37252-fea7-4267-a568-aa0e5964ee21'
26+
}
27+
`

examples/screenshot/screenshot-with-screenshot-api.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const result = await client.screenshot(
2424
}),
2525
);
2626

27-
// screenshot metadata (format, upstream_status_code, upstream_url)
27+
// screenshot metadata (extension_name, upstream_status_code, upstream_url)
2828
const metadata = result.metadata;
2929

3030
// screenshot binary

src/client.ts

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -272,14 +272,14 @@ export class ScrapflyClient {
272272
throw new Error('Screenshot binary does not exist');
273273
}
274274

275-
const format = result.metadata.format;
275+
const extension_name = result.metadata.extension_name;
276276
let file_path;
277277

278278
if (savePath) {
279279
fs.mkdirSync(savePath, { recursive: true });
280-
file_path = path.join(savePath, `${name}.${format}`);
280+
file_path = path.join(savePath, `${name}.${extension_name}`);
281281
} else {
282-
file_path = `${name}.${format}`;
282+
file_path = `${name}.${extension_name}`;
283283
}
284284

285285
const content = Buffer.from(result.image);
@@ -292,13 +292,11 @@ export class ScrapflyClient {
292292
async handleScreenshotResponse(response: Response): Promise<ScreenshotResult> {
293293
if (response.headers.get('content-encoding') != 'gzip') {
294294
const data = (await response.json()) as any;
295-
if (response.headers.get('content-encoding') !== 'gzip') {
296-
if (data.http_code == 401 || response.status == 401) {
297-
throw new errors.BadApiKeyError(JSON.stringify(data));
298-
}
299-
if ('error_id' in data) {
300-
throw new errors.ScreenshotApiError(JSON.stringify(data));
301-
}
295+
if (data.http_code == 401 || response.status == 401) {
296+
throw new errors.BadApiKeyError(JSON.stringify(data));
297+
}
298+
if ('error_id' in data) {
299+
throw new errors.ScreenshotApiError(JSON.stringify(data));
302300
}
303301
}
304302
if (!response.ok) {
@@ -350,7 +348,7 @@ export class ScrapflyClient {
350348
}
351349
if (!response.ok) {
352350
throw new errors.ApiHttpClientError(JSON.stringify(await response.json()));
353-
}
351+
}
354352
const result = new ExtractionResult(data);
355353
return result;
356354
}
@@ -381,7 +379,7 @@ export class ScrapflyClient {
381379
log.error('error', e);
382380
throw e;
383381
}
384-
const result = await this.handleExtractionResponse(response)
382+
const result = await this.handleExtractionResponse(response);
385383
return result;
386384
}
387385
}

src/extractionconfig.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export class ExtractionConfig {
1010
epehemeral_template?: object; // epehemeraly declared json template
1111
extraction_prompt?: string = null;
1212
extraction_model?: string = null;
13+
webhook?: string = null;
1314

1415
constructor(options: {
1516
body: string;
@@ -20,6 +21,7 @@ export class ExtractionConfig {
2021
epehemeral_template?: object; // epehemeraly declared json template
2122
extraction_prompt?: string;
2223
extraction_model?: string;
24+
webhook?: string;
2325
}) {
2426
this.body = options.body;
2527
this.content_type = options.content_type;
@@ -29,6 +31,7 @@ export class ExtractionConfig {
2931
this.epehemeral_template = options.epehemeral_template;
3032
this.extraction_prompt = options.extraction_prompt;
3133
this.extraction_model = options.extraction_model;
34+
this.webhook = options.webhook;
3235
}
3336

3437
toApiParams(options: { key: string }): Record<string, any> {
@@ -65,6 +68,11 @@ export class ExtractionConfig {
6568
if (this.extraction_model) {
6669
params.extraction_model = this.extraction_model;
6770
}
71+
72+
if (this.webhook) {
73+
params.webhook_name = this.webhook;
74+
}
75+
6876
return params;
6977
}
7078
}

src/result.ts

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,41 +267,51 @@ export class AccountData {
267267
}
268268

269269
export type ScreenshotMetadata = {
270-
format: string;
270+
extension_name: string;
271271
upstream_status_code: number;
272272
upstream_url: string;
273273
};
274274

275275
export class ScreenshotResult {
276276
image: ArrayBuffer;
277277
metadata: ScreenshotMetadata;
278+
result: object;
278279

279280
constructor(response: Response, data: ArrayBuffer) {
280281
this.image = data;
281282
this.metadata = this.defineMetadata(response);
283+
this.result = this.returnRaw(response, data); // raw result
282284
}
283285

284286
private defineMetadata(response: Response): ScreenshotMetadata {
285287
const contentType = response.headers.get('content-type');
286-
let format;
288+
let extension_name;
287289
if (contentType) {
288-
format = contentType.split('/')[1].split(';')[0];
289-
format = format === 'jpeg' ? 'jpg' : format;
290+
extension_name = contentType.split('/')[1].split(';')[0];
290291
}
291292
return {
292-
format: format,
293+
extension_name: extension_name,
293294
upstream_status_code: parseInt(response.headers.get('X-Scrapfly-Upstream-Http-Code'), 10),
294295
upstream_url: response.headers.get('X-Scrapfly-Upstream-Url'),
295296
};
296297
}
298+
299+
private returnRaw(response: Response, data: ArrayBuffer): Promise<any> {
300+
if (response.headers.get('content-encoding') == 'gzip') {
301+
return null
302+
}
303+
return JSON.parse(new TextDecoder().decode(data));
304+
}
297305
}
298306

299307
export class ExtractionResult {
300308
data: string;
301309
content_type: string;
310+
result: object;
302311

303312
constructor(response: { data: string; content_type: string }) {
304313
this.data = response.data;
305314
this.content_type = response.content_type;
315+
this.result = response; // raw data
306316
}
307317
}

src/screenshotconfig.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ export class ScreenshotConfig {
4747
cache?: boolean = null;
4848
cache_ttl?: boolean = null;
4949
cache_clear?: boolean = null;
50+
webhook?: string = null;
5051

5152
constructor(options: {
5253
url: string;
@@ -63,14 +64,15 @@ export class ScreenshotConfig {
6364
cache?: boolean;
6465
cache_ttl?: boolean;
6566
cache_clear?: boolean;
67+
webhook?: string;
6668
}) {
6769
if (options.format && !Object.values(Format).includes(options.format)) {
6870
throw new ScreenshotConfigError(`Invalid format param value: ${options.format}`);
6971
}
7072
this.format = options.format ?? this.format;
7173
// Validate options against the enum
7274
if (options.options) {
73-
options.options.forEach(opt => {
75+
options.options.forEach((opt) => {
7476
if (!Object.values(Options).includes(opt)) {
7577
throw new ScreenshotConfigError(`Invalid options param value: ${opt}`);
7678
}
@@ -90,6 +92,7 @@ export class ScreenshotConfig {
9092
this.cache = options.cache ?? this.cache;
9193
this.cache_ttl = options.cache_ttl ?? this.cache_ttl;
9294
this.cache_clear = options.cache_clear ?? this.cache_clear;
95+
this.webhook = options.webhook;
9396
}
9497

9598
toApiParams(options: { key: string }): Record<string, any> {
@@ -155,6 +158,10 @@ export class ScreenshotConfig {
155158
}
156159
}
157160

161+
if (this.webhook) {
162+
params.webhook_name = this.webhook;
163+
}
164+
158165
return params;
159166
}
160167
}

0 commit comments

Comments
 (0)