@@ -33,6 +33,23 @@ describe("scrapeconfig", () => {
3333 expect ( config . headers [ "content-type" ] ) . toBe ( "application/json" ) ;
3434 expect ( config . body ) . toBe ( "{\"foo\":\"123\",\"bar\":456}" ) ;
3535 } )
36+
37+ it ( "POST/PUT/PATCH body defaults as content-type text/plain" , async ( ) => {
38+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , "method" : "POST" , "body" : "foo+bar" } ) ;
39+ expect ( config . headers [ "content-type" ] ) . toBe ( "text/plain" ) ;
40+ expect ( config . body ) . toBe ( "foo+bar" ) ;
41+ } )
42+ it ( "POST/PUT/PATCH data encodes when formdata content-type is set" , async ( ) => {
43+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , "method" : "POST" , "data" : { "foo" : 1 , "bar" : "mojito please" } , "headers" : { "content-type" : "application/x-www-form-urlencoded" } } ) ;
44+ expect ( config . headers [ "content-type" ] ) . toBe ( "application/x-www-form-urlencoded" ) ;
45+ expect ( config . body ) . toBe ( "foo=1&bar=mojito+please" ) ;
46+ } )
47+ it ( "POST/PUT/PATCH data throws when unsupported content-type is set" , async ( ) => {
48+ expect ( ( ) => {
49+ new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , "method" : "POST" , "data" : { "foo" : 1 , "bar" : "mojito please" } , "headers" : { "content-type" : "does/not/exist" } } ) ;
50+ } ) . toThrow ( ScrapeConfigError ) ;
51+ } )
52+
3653} ) ;
3754
3855describe ( 'config invalid' , ( ) => {
@@ -117,9 +134,194 @@ describe("url param generation", () => {
117134 "screenshots[everything]" : "fullpage" ,
118135 } ) ;
119136 } ) ;
137+ it ( "asp enables" , ( ) => {
138+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , asp : true } ) ;
139+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
140+ "key" : "1234" ,
141+ "url" : "http://httpbin.dev/get" ,
142+ "asp" : true ,
143+ } ) ;
144+ } ) ;
145+ it ( "dns enables" , ( ) => {
146+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , dns : true } ) ;
147+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
148+ "key" : "1234" ,
149+ "url" : "http://httpbin.dev/get" ,
150+ "dns" : true ,
151+ } ) ;
152+ } ) ;
153+ it ( "ssl enables" , ( ) => {
154+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , ssl : true } ) ;
155+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
156+ "key" : "1234" ,
157+ "url" : "http://httpbin.dev/get" ,
158+ "ssl" : true ,
159+ } ) ;
160+ } ) ;
161+ it ( "tags set" , ( ) => {
162+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , tags : [ "foo" , "bar" , "gaz" ] } ) ;
163+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
164+ "key" : "1234" ,
165+ "url" : "http://httpbin.dev/get" ,
166+ "tags" : "foo,bar,gaz" ,
167+ } ) ;
168+ } ) ;
169+ it ( "debug sets" , ( ) => {
170+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , debug : true } ) ;
171+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
172+ "key" : "1234" ,
173+ "url" : "http://httpbin.dev/get" ,
174+ "debug" : true ,
175+ } ) ;
176+ } ) ;
177+ it ( "lang sets" , ( ) => {
178+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , lang : [ "en" , "fr" , "lt" ] } ) ;
179+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
180+ "key" : "1234" ,
181+ "url" : "http://httpbin.dev/get" ,
182+ "lang" : "en,fr,lt" ,
183+ } ) ;
184+ } ) ;
185+ it ( "os sets" , ( ) => {
186+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , os : "linux" } ) ;
187+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
188+ "key" : "1234" ,
189+ "url" : "http://httpbin.dev/get" ,
190+ "os" : "linux" ,
191+ } ) ;
192+ } ) ;
193+ it ( "proxy_pool sets" , ( ) => {
194+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , proxy_pool : "public_residential_pool" } ) ;
195+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
196+ "key" : "1234" ,
197+ "url" : "http://httpbin.dev/get" ,
198+ "proxy_pool" : "public_residential_pool" ,
199+ } ) ;
200+ } ) ;
201+ it ( "session sets" , ( ) => {
202+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , session : "foo123" } ) ;
203+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
204+ "key" : "1234" ,
205+ "url" : "http://httpbin.dev/get" ,
206+ "session" : "foo123" ,
207+ } ) ;
208+ } ) ;
209+ it ( "session_sticky_proxy sets" , ( ) => {
210+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , session : "foo123" , "session_sticky_proxy" : true } ) ;
211+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
212+ "key" : "1234" ,
213+ "url" : "http://httpbin.dev/get" ,
214+ "session" : "foo123" ,
215+ "session_sticky_proxy" : true ,
216+ } ) ;
217+ } ) ;
218+ it ( "session_sticky_proxy ignored with no session" , ( ) => {
219+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , "session_sticky_proxy" : true } ) ;
220+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
221+ "key" : "1234" ,
222+ "url" : "http://httpbin.dev/get" ,
223+ } ) ;
224+ } ) ;
120225
226+ it ( "correlation id sets" , ( ) => {
227+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , correlation_id : "1234" } ) ;
228+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
229+ "key" : "1234" ,
230+ "url" : "http://httpbin.dev/get" ,
231+ "correlation_id" : "1234" ,
232+ } ) ;
233+ } ) ;
234+ it ( "webhook enables" , ( ) => {
235+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , webhook : "snailmail" } ) ;
236+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
237+ "key" : "1234" ,
238+ "url" : "http://httpbin.dev/get" ,
239+ "webhook_name" : "snailmail" ,
240+ } ) ;
241+ } ) ;
121242
243+ it ( "timeout enables" , ( ) => {
244+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , timeout : 10 } ) ;
245+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
246+ "key" : "1234" ,
247+ "url" : "http://httpbin.dev/get" ,
248+ "timeout" : 10 ,
249+ } ) ;
250+ } ) ;
251+ it ( "retry disables" , ( ) => {
252+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , retry : false } ) ;
253+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
254+ "key" : "1234" ,
255+ "url" : "http://httpbin.dev/get" ,
256+ "retry" : false ,
257+ } ) ;
258+ } ) ;
259+ it ( "cache enables" , ( ) => {
260+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , cache : true , cache_ttl : 60 , cache_clear : true } ) ;
261+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
262+ "key" : "1234" ,
263+ "url" : "http://httpbin.dev/get" ,
264+ "cache" : true ,
265+ "cache_ttl" : 60 ,
266+ "cache_clear" : true ,
267+ } ) ;
268+ } ) ;
122269
270+ it ( "auto_scroll enables" , ( ) => {
271+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , auto_scroll : true , render_js : true } ) ;
272+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
273+ "key" : "1234" ,
274+ "url" : "http://httpbin.dev/get" ,
275+ "auto_scroll" : true ,
276+ "render_js" : true ,
277+ } ) ;
278+ } ) ;
279+ it ( "wait_for_selector sets" , ( ) => {
280+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , wait_for_selector : "#foo" , render_js : true } ) ;
281+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
282+ "key" : "1234" ,
283+ "url" : "http://httpbin.dev/get" ,
284+ "wait_for_selector" : "#foo" ,
285+ "render_js" : true ,
286+ } ) ;
287+ } ) ;
288+ it ( "rendering_wait sets" , ( ) => {
289+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , rendering_wait : 10 , render_js : true } ) ;
290+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
291+ "key" : "1234" ,
292+ "url" : "http://httpbin.dev/get" ,
293+ "rendering_wait" : 10 ,
294+ "render_js" : true ,
295+ } ) ;
296+ } ) ;
297+ it ( "render_js optionals ignored when disabled" , ( ) => {
298+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , wait_for_selector : ".foo" , screenshots : { "all" : "fullpage" } , js_scenario : [ ] , js : "" , rendering_wait : 10 } ) ;
299+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
300+ "key" : "1234" ,
301+ "url" : "http://httpbin.dev/get" ,
302+ } ) ;
303+ } ) ;
304+
305+
306+ it ( "cache args are ignored when cache disabled" , ( ) => {
307+ const config = new ScrapeConfig ( { "url" : "http://httpbin.dev/get" , cache : false , cache_ttl : 60 , cache_clear : true } ) ;
308+ expect ( config . toApiParams ( { key : "1234" } ) ) . toEqual ( {
309+ "key" : "1234" ,
310+ "url" : "http://httpbin.dev/get" ,
311+ } ) ;
312+ } ) ;
313+
314+
315+ it ( "js encodes" , ( ) => {
316+ const code = 'return document.querySelectorAll(".review p").map(p=>p.outerText))'
317+ const config = new ScrapeConfig ( { url : "https://web-scraping.dev/product/1" , js : code , render_js : true } ) ;
318+ expect ( config . toApiParams ( { "key" : "1234" } ) ) . toEqual ( {
319+ url : "https://web-scraping.dev/product/1" ,
320+ key : "1234" ,
321+ render_js : true ,
322+ js : "cmV0dXJuIGRvY3VtZW50LnF1ZXJ5U2VsZWN0b3JBbGwoIi5yZXZpZXcgcCIpLm1hcChwPT5wLm91dGVyVGV4dCkp" ,
323+ } ) ;
324+ } )
123325 it ( "js scenario encodes" , ( ) => {
124326 const scenario = [
125327 { "wait_for_selector" : { "selector" : ".review" } } ,
0 commit comments