@@ -3,10 +3,13 @@ import { log } from './logger.js';
33import { Rec , HttpMethod } from './types.js' ;
44import { ScrapeConfigError } from './errors.js' ;
55
6+ type ScreenshotFlags = "load_images" | "dark_mode" | "block_banners" | "high_quality" | "print_media_format" ;
7+ type Format = "raw" | "json" | "text" | "markdown" | "clean_html" ;
8+
69export class ScrapeConfig {
710 static PUBLIC_DATACENTER_POOL = 'public_datacenter_pool' ;
811 static PUBLIC_RESIDENTIAL_POOL = 'public_residential_pool' ;
9-
12+
1013 url : string ;
1114 retry = true ;
1215 method : HttpMethod = 'GET' ;
@@ -24,6 +27,7 @@ export class ScrapeConfig {
2427 proxy_pool ?: string = null ;
2528 session ?: string = null ;
2629 tags : Set < string > = new Set < string > ( ) ;
30+ format ?: Format = null ; // raw(unchanged)
2731 correlation_id ?: string = null ;
2832 cookies ?: Rec < string > = null ;
2933 body ?: string = null ;
@@ -34,6 +38,7 @@ export class ScrapeConfig {
3438 wait_for_selector ?: string = null ;
3539 session_sticky_proxy = false ;
3640 screenshots ?: Rec < any > = null ;
41+ screenshot_flags ?: ScreenshotFlags [ ] = null ;
3742 webhook ?: string = null ;
3843 timeout ?: number = null ; // in milliseconds
3944 js_scenario ?: Rec < any > = null ;
@@ -60,6 +65,7 @@ export class ScrapeConfig {
6065 proxy_pool ?: string ;
6166 session ?: string ;
6267 tags ?: Array < string > ;
68+ format ?: Format ;
6369 correlation_id ?: string ;
6470 cookies ?: Rec < string > ;
6571 body ?: string ;
@@ -69,6 +75,7 @@ export class ScrapeConfig {
6975 rendering_wait ?: number ;
7076 wait_for_selector ?: string ;
7177 screenshots ?: Rec < any > ;
78+ screenshot_flags ?: ScreenshotFlags [ ] ;
7279 session_sticky_proxy ?: boolean ;
7380 webhook ?: string ;
7481 timeout ?: number ; // in milliseconds
@@ -96,6 +103,7 @@ export class ScrapeConfig {
96103 this . proxy_pool = options . proxy_pool ?? this . proxy_pool ;
97104 this . session = options . session ?? this . session ;
98105 this . tags = new Set ( options . tags ) ?? this . tags ;
106+ this . format = options . format ?? this . format ;
99107 this . correlation_id = options . correlation_id ?? this . correlation_id ;
100108 this . cookies = options . cookies
101109 ? Object . fromEntries ( Object . entries ( options . cookies ) . map ( ( [ k , v ] ) => [ k . toLowerCase ( ) , v ] ) )
@@ -106,6 +114,7 @@ export class ScrapeConfig {
106114 this . rendering_wait = options . rendering_wait ?? this . rendering_wait ;
107115 this . wait_for_selector = options . wait_for_selector ?? this . wait_for_selector ;
108116 this . screenshots = options . screenshots ?? this . screenshots ;
117+ this . screenshot_flags = options . screenshot_flags ?? this . screenshot_flags ;
109118 this . webhook = options . webhook ?? this . webhook ;
110119 this . timeout = options . timeout ?? this . timeout ;
111120 this . js_scenario = options . js_scenario ?? this . js_scenario ;
@@ -194,6 +203,13 @@ export class ScrapeConfig {
194203 Object . keys ( this . screenshots ) . forEach ( ( key ) => {
195204 params [ `screenshots[${ key } ]` ] = this . screenshots [ key ] ;
196205 } ) ;
206+ if ( this . screenshot_flags ) {
207+ params . screenshot_flags = this . screenshot_flags . join ( ',' ) ;
208+ }
209+ } else {
210+ if ( this . screenshot_flags ) {
211+ log . warn ( 'Params "screenshot_flags" is ignored. Works only if screenshots is enabled' ) ;
212+ }
197213 }
198214 if ( this . auto_scroll !== null ) {
199215 params . auto_scroll = this . auto_scroll ;
@@ -247,6 +263,9 @@ export class ScrapeConfig {
247263 if ( this . tags . size > 0 ) {
248264 params . tags = Array . from ( this . tags ) . join ( ',' ) ;
249265 }
266+ if ( this . format ) {
267+ params . format = this . format . valueOf ( ) ;
268+ }
250269 if ( this . correlation_id ) {
251270 params . correlation_id = this . correlation_id ;
252271 }
0 commit comments