Skip to content

Commit 1a86bc5

Browse files
committed
wrap up test coverage to 100%!
1 parent 3b4e767 commit 1a86bc5

File tree

2 files changed

+209
-4
lines changed

2 files changed

+209
-4
lines changed

__tests__/scrapeconfig.test.ts

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,23 @@ describe("scrapeconfig", () => {
3333
expect(config.headers["content-type"]).toBe("application/json");
3434
expect(config.body).toBe("{\"foo\":\"123\",\"bar\":456}");
3535
})
36+
37+
it("POST/PUT/PATCH body defaults as content-type text/plain", async () => {
38+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", "method": "POST", "body": "foo+bar" });
39+
expect(config.headers["content-type"]).toBe("text/plain");
40+
expect(config.body).toBe("foo+bar");
41+
})
42+
it("POST/PUT/PATCH data encodes when formdata content-type is set", async () => {
43+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", "method": "POST", "data": {"foo": 1, "bar": "mojito please"}, "headers": {"content-type": "application/x-www-form-urlencoded"} });
44+
expect(config.headers["content-type"]).toBe("application/x-www-form-urlencoded");
45+
expect(config.body).toBe("foo=1&bar=mojito+please");
46+
})
47+
it("POST/PUT/PATCH data throws when unsupported content-type is set", async () => {
48+
expect(() => {
49+
new ScrapeConfig({ "url": "http://httpbin.dev/get", "method": "POST", "data": {"foo": 1, "bar": "mojito please"}, "headers": {"content-type": "does/not/exist"} });
50+
}).toThrow(ScrapeConfigError);
51+
})
52+
3653
});
3754

3855
describe('config invalid', () => {
@@ -117,9 +134,194 @@ describe("url param generation", () => {
117134
"screenshots[everything]": "fullpage",
118135
});
119136
});
137+
it("asp enables", () => {
138+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", asp: true });
139+
expect(config.toApiParams({ key: "1234" })).toEqual({
140+
"key": "1234",
141+
"url": "http://httpbin.dev/get",
142+
"asp": true,
143+
});
144+
});
145+
it("dns enables", () => {
146+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", dns: true });
147+
expect(config.toApiParams({ key: "1234" })).toEqual({
148+
"key": "1234",
149+
"url": "http://httpbin.dev/get",
150+
"dns": true,
151+
});
152+
});
153+
it("ssl enables", () => {
154+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", ssl: true });
155+
expect(config.toApiParams({ key: "1234" })).toEqual({
156+
"key": "1234",
157+
"url": "http://httpbin.dev/get",
158+
"ssl": true,
159+
});
160+
});
161+
it("tags set", () => {
162+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", tags: ["foo", "bar", "gaz"] });
163+
expect(config.toApiParams({ key: "1234" })).toEqual({
164+
"key": "1234",
165+
"url": "http://httpbin.dev/get",
166+
"tags": "foo,bar,gaz",
167+
});
168+
});
169+
it("debug sets", () => {
170+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", debug: true});
171+
expect(config.toApiParams({ key: "1234" })).toEqual({
172+
"key": "1234",
173+
"url": "http://httpbin.dev/get",
174+
"debug": true,
175+
});
176+
});
177+
it("lang sets", () => {
178+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", lang: ["en", "fr", "lt"]});
179+
expect(config.toApiParams({ key: "1234" })).toEqual({
180+
"key": "1234",
181+
"url": "http://httpbin.dev/get",
182+
"lang": "en,fr,lt",
183+
});
184+
});
185+
it("os sets", () => {
186+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", os: "linux"});
187+
expect(config.toApiParams({ key: "1234" })).toEqual({
188+
"key": "1234",
189+
"url": "http://httpbin.dev/get",
190+
"os": "linux",
191+
});
192+
});
193+
it("proxy_pool sets", () => {
194+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", proxy_pool: "public_residential_pool"});
195+
expect(config.toApiParams({ key: "1234" })).toEqual({
196+
"key": "1234",
197+
"url": "http://httpbin.dev/get",
198+
"proxy_pool": "public_residential_pool",
199+
});
200+
});
201+
it("session sets", () => {
202+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", session: "foo123"});
203+
expect(config.toApiParams({ key: "1234" })).toEqual({
204+
"key": "1234",
205+
"url": "http://httpbin.dev/get",
206+
"session": "foo123",
207+
});
208+
});
209+
it("session_sticky_proxy sets", () => {
210+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", session: "foo123", "session_sticky_proxy": true});
211+
expect(config.toApiParams({ key: "1234" })).toEqual({
212+
"key": "1234",
213+
"url": "http://httpbin.dev/get",
214+
"session": "foo123",
215+
"session_sticky_proxy": true,
216+
});
217+
});
218+
it("session_sticky_proxy ignored with no session", () => {
219+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", "session_sticky_proxy": true});
220+
expect(config.toApiParams({ key: "1234" })).toEqual({
221+
"key": "1234",
222+
"url": "http://httpbin.dev/get",
223+
});
224+
});
120225

226+
it("correlation id sets", () => {
227+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", correlation_id: "1234"});
228+
expect(config.toApiParams({ key: "1234" })).toEqual({
229+
"key": "1234",
230+
"url": "http://httpbin.dev/get",
231+
"correlation_id": "1234",
232+
});
233+
});
234+
it("webhook enables", () => {
235+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", webhook: "snailmail" });
236+
expect(config.toApiParams({ key: "1234" })).toEqual({
237+
"key": "1234",
238+
"url": "http://httpbin.dev/get",
239+
"webhook_name": "snailmail",
240+
});
241+
});
121242

243+
it("timeout enables", () => {
244+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", timeout: 10 });
245+
expect(config.toApiParams({ key: "1234" })).toEqual({
246+
"key": "1234",
247+
"url": "http://httpbin.dev/get",
248+
"timeout": 10,
249+
});
250+
});
251+
it("retry disables", () => {
252+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", retry: false });
253+
expect(config.toApiParams({ key: "1234" })).toEqual({
254+
"key": "1234",
255+
"url": "http://httpbin.dev/get",
256+
"retry": false,
257+
});
258+
});
259+
it("cache enables", () => {
260+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", cache: true, cache_ttl: 60, cache_clear: true });
261+
expect(config.toApiParams({ key: "1234" })).toEqual({
262+
"key": "1234",
263+
"url": "http://httpbin.dev/get",
264+
"cache": true,
265+
"cache_ttl": 60,
266+
"cache_clear": true,
267+
});
268+
});
122269

270+
it("auto_scroll enables", () => {
271+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", auto_scroll: true, render_js: true });
272+
expect(config.toApiParams({ key: "1234" })).toEqual({
273+
"key": "1234",
274+
"url": "http://httpbin.dev/get",
275+
"auto_scroll": true,
276+
"render_js": true,
277+
});
278+
});
279+
it("wait_for_selector sets", () => {
280+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", wait_for_selector: "#foo", render_js: true });
281+
expect(config.toApiParams({ key: "1234" })).toEqual({
282+
"key": "1234",
283+
"url": "http://httpbin.dev/get",
284+
"wait_for_selector": "#foo",
285+
"render_js": true,
286+
});
287+
});
288+
it("rendering_wait sets", () => {
289+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", rendering_wait: 10, render_js: true });
290+
expect(config.toApiParams({ key: "1234" })).toEqual({
291+
"key": "1234",
292+
"url": "http://httpbin.dev/get",
293+
"rendering_wait": 10,
294+
"render_js": true,
295+
});
296+
});
297+
it("render_js optionals ignored when disabled", () => {
298+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", wait_for_selector: ".foo", screenshots: { "all": "fullpage" }, js_scenario: [], js: "", rendering_wait: 10 });
299+
expect(config.toApiParams({ key: "1234" })).toEqual({
300+
"key": "1234",
301+
"url": "http://httpbin.dev/get",
302+
});
303+
});
304+
305+
306+
it("cache args are ignored when cache disabled", () => {
307+
const config = new ScrapeConfig({ "url": "http://httpbin.dev/get", cache: false, cache_ttl: 60, cache_clear: true });
308+
expect(config.toApiParams({ key: "1234" })).toEqual({
309+
"key": "1234",
310+
"url": "http://httpbin.dev/get",
311+
});
312+
});
313+
314+
315+
it("js encodes", () => {
316+
const code = 'return document.querySelectorAll(".review p").map(p=>p.outerText))'
317+
const config = new ScrapeConfig({ url: "https://web-scraping.dev/product/1", js: code, render_js: true });
318+
expect(config.toApiParams({ "key": "1234" })).toEqual({
319+
url: "https://web-scraping.dev/product/1",
320+
key: "1234",
321+
render_js: true,
322+
js: "cmV0dXJuIGRvY3VtZW50LnF1ZXJ5U2VsZWN0b3JBbGwoIi5yZXZpZXcgcCIpLm1hcChwPT5wLm91dGVyVGV4dCkp",
323+
});
324+
})
123325
it("js scenario encodes", () => {
124326
const scenario = [
125327
{ "wait_for_selector": { "selector": ".review" } },

src/scrapeconfig.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ export class ScrapeConfig {
6060
cache_ttl?: number;
6161
proxy_pool?: string;
6262
session?: string;
63-
tags?: Set<string>;
63+
tags?: Array<string>;
6464
correlation_id?: string;
6565
cookies?: Rec<string>;
6666
body?: string;
@@ -93,7 +93,7 @@ export class ScrapeConfig {
9393
this.cache_ttl = options.cache_ttl ?? this.cache_ttl;
9494
this.proxy_pool = options.proxy_pool ?? this.proxy_pool;
9595
this.session = options.session ?? this.session;
96-
this.tags = options.tags ?? this.tags;
96+
this.tags = new Set(options.tags) ?? this.tags;
9797
this.correlation_id = options.correlation_id ?? this.correlation_id;
9898
this.cookies = options.cookies ? Object.fromEntries(Object.entries(options.cookies).map(([k, v]) => [k.toLowerCase(), v])) : {};
9999
this.body = options.body ?? this.body;
@@ -109,6 +109,9 @@ export class ScrapeConfig {
109109
this.os = options.os ?? this.os;
110110
this.lang = options.lang ?? this.lang;
111111
this.auto_scroll = options.auto_scroll ?? this.auto_scroll;
112+
this.dns = options.dns ?? this.dns;
113+
this.ssl = options.ssl ?? this.ssl;
114+
this.debug = options.debug ?? this.debug;
112115
if (this.body && this.data) {
113116
throw new ScrapeConfigError("Cannot set both body and data");
114117
}
@@ -217,7 +220,7 @@ export class ScrapeConfig {
217220
params.cache_clear = true;
218221
}
219222
if (this.cache_ttl) {
220-
params.cache_clear = true;
223+
params.cache_ttl = this.cache_ttl;
221224
}
222225
} else {
223226
if (this.cache_clear) {
@@ -249,7 +252,7 @@ export class ScrapeConfig {
249252
log.warn('Params "session_sticky_proxy" is ignored. Works only if session is enabled');
250253
}
251254
}
252-
if (this.debug) {
255+
if (this.debug === true) {
253256
params.debug = true;
254257
}
255258
if (this.proxy_pool) {

0 commit comments

Comments
 (0)