1+ import * as errors from '../../src/errors.js' ;
2+ import { ScrapflyClient } from '../../src/client.js' ;
3+ import { ExtractionConfig } from '../../src/extractionconfig.js'
4+ import { describe , it , expect , beforeEach , jest } from '@jest/globals' ;
5+ import { responseFactory } from '../utils.js' ;
6+
7+ describe ( 'extract' , ( ) => {
8+ const KEY = '__API_KEY__' ;
9+ const client = new ScrapflyClient ( { key : KEY } ) ;
10+
11+ beforeEach ( ( ) => {
12+ jest . spyOn ( client , 'fetch' ) . mockClear ( ) ; // clear all mock meta on each test
13+ } ) ;
14+
15+ it ( 'succeeds' , async ( ) => {
16+ const spy = jest . spyOn ( client , 'fetch' ) ;
17+ const html = 'very long html file' ;
18+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( config : Request ) : Promise < any > => {
19+ const configUrl = config [ Object . getOwnPropertySymbols ( config ) [ 1 ] ] . url ;
20+ const configBody = config [ Object . getOwnPropertySymbols ( config ) [ 1 ] ] . body . source ;
21+ // Ensure the URL matches the pattern
22+ expect ( configUrl . origin + configUrl . pathname ) . toEqual ( client . HOST + '/extraction' ) ;
23+ expect ( config . method ) . toEqual ( 'POST' ) ;
24+ expect ( configUrl . searchParams . get ( 'key' ) ) . toMatch ( KEY ) ;
25+ expect ( configBody ) . toEqual ( html ) ;
26+ const body = { data : 'a document summary' , content_type : 'text/html' } ;
27+ return responseFactory ( body , {
28+ status : 200 ,
29+ } ) ;
30+ } ) ;
31+
32+ const result = await client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ;
33+ expect ( result ) . toBeDefined ( ) ;
34+ expect ( result . content_type ) . toBe ( 'text/html' ) ;
35+ expect ( result . data ) . toBe ( 'a document summary' ) ;
36+ expect ( spy ) . toHaveBeenCalledTimes ( 1 ) ;
37+ } ) ;
38+
39+ it ( 'fails due to failing to invalid config' , async ( ) => {
40+ const html = 'very long html file' ;
41+ await expect (
42+ client . extract (
43+ new ExtractionConfig ( {
44+ body : html ,
45+ content_type : 'text/html' ,
46+ ephemeral_template : { source : 'html' } ,
47+ template : 'template' ,
48+ } ) ,
49+ ) ,
50+ ) . rejects . toThrow ( errors . ExtractionConfigError ) ;
51+ } ) ;
52+
53+ it ( 'fails to invalid API key' , async ( ) => {
54+ const html = 'very long html file' ;
55+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( ) : Promise < any > => {
56+ const result = {
57+ status : 'error' ,
58+ http_code : 401 ,
59+ reason : 'Unauthorized' ,
60+ error_id : '301e2d9e-b4f5-4289-85ea-e452143338df' ,
61+ message : 'Invalid API key' ,
62+ } ;
63+ return responseFactory ( result , {
64+ status : 401 ,
65+ headers : {
66+ 'Content-Type' : 'application/json' ,
67+ } ,
68+ } ) ;
69+ } ) ;
70+ await expect ( client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ) . rejects . toThrow (
71+ errors . BadApiKeyError ,
72+ ) ;
73+ } ) ;
74+
75+ it ( 'fails to any extraction related error' , async ( ) => {
76+ const html = 'very long html file' ;
77+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( ) : Promise < any > => {
78+ const result = {
79+ code : 'ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
80+ error_id : 'f0e9a6af-846a-49ab-8321-e21bb12bf494' ,
81+ http_code : 422 ,
82+ links : {
83+ 'Related Error Doc' :
84+ 'https://scrapfly.io/docs/extraction-api/error/ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
85+ } ,
86+ message : 'ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
87+ } ;
88+ return responseFactory ( result , {
89+ status : 422 ,
90+ headers : {
91+ 'Content-Type' : 'application/json' ,
92+ } ,
93+ } ) ;
94+ } ) ;
95+ await expect ( client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ) . rejects . toThrow (
96+ errors . ExtractionApiError ,
97+ ) ;
98+ } ) ;
99+ } ) ;
0 commit comments