11import * as errors from '../src/errors.js' ;
22import { ScrapflyClient } from '../src/client.js' ;
33import { ScrapeConfig } from '../src/scrapeconfig.js' ;
4- import { describe , it , expect , beforeEach , jest } from '@jest/globals' ;
54import { ScreenshotConfig } from '../src/screenshotconfig.js' ;
5+ import { ExtractionConfig } from '../src/extractionconfig.js' ;
6+ import { describe , it , expect , beforeEach , jest } from '@jest/globals' ;
67
78function mockedStream ( ) {
89 const mockStream = {
@@ -484,28 +485,28 @@ describe('screenshot', () => {
484485 jest . spyOn ( client , 'fetch' ) . mockClear ( ) ; // clear all mock meta on each test
485486 } ) ;
486487
487- it ( 'succeeds' , async ( ) => {
488- const spy = jest . spyOn ( client , 'fetch' ) ;
489- const url = 'https://web-scraping.dev/' ;
490- jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( config : Request ) : Promise < any > => {
491- const configUrl = config [ Object . getOwnPropertySymbols ( config ) [ 1 ] ] . url ;
492- // Ensure the URL matches the pattern
493- expect ( configUrl . origin + configUrl . pathname ) . toEqual ( client . HOST + '/screenshot' ) ;
494- expect ( config . method ) . toEqual ( 'GET' ) ;
495- expect ( configUrl . searchParams . get ( 'key' ) ) . toMatch ( KEY ) ;
496- expect ( configUrl . searchParams . get ( 'url' ) ) . toMatch ( url ) ;
497- expect ( Array . from ( configUrl . searchParams . keys ( ) ) ) . toEqual ( [ 'key' , 'url' ] ) ;
498- const body = mockedStream ( ) ;
499- return responseFactory ( body , {
500- status : 200 ,
501- headers : {
502- 'content-encoding' : 'gzip' ,
503- 'content-type' : 'image/png' ,
504- 'x-scrapfly-upstream-http-code' : '200' ,
505- 'x-scrapfly-upstream-url' : url ,
506- } ,
507- } ) ;
488+ it ( 'succeeds' , async ( ) => {
489+ const spy = jest . spyOn ( client , 'fetch' ) ;
490+ const url = 'https://web-scraping.dev/' ;
491+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( config : Request ) : Promise < any > => {
492+ const configUrl = config [ Object . getOwnPropertySymbols ( config ) [ 1 ] ] . url ;
493+ // Ensure the URL matches the pattern
494+ expect ( configUrl . origin + configUrl . pathname ) . toEqual ( client . HOST + '/screenshot' ) ;
495+ expect ( config . method ) . toEqual ( 'GET' ) ;
496+ expect ( configUrl . searchParams . get ( 'key' ) ) . toMatch ( KEY ) ;
497+ expect ( configUrl . searchParams . get ( 'url' ) ) . toMatch ( url ) ;
498+ expect ( Array . from ( configUrl . searchParams . keys ( ) ) ) . toEqual ( [ 'key' , 'url' ] ) ;
499+ const body = mockedStream ( ) ;
500+ return responseFactory ( body , {
501+ status : 200 ,
502+ headers : {
503+ 'content-encoding' : 'gzip' ,
504+ 'content-type' : 'image/png' ,
505+ 'x-scrapfly-upstream-http-code' : '200' ,
506+ 'x-scrapfly-upstream-url' : url ,
507+ } ,
508508 } ) ;
509+ } ) ;
509510
510511 const result = await client . screenshot ( new ScreenshotConfig ( { url : url } ) ) ;
511512 expect ( result ) . toBeDefined ( ) ;
@@ -531,7 +532,7 @@ describe('screenshot', () => {
531532 } ,
532533 } ) ;
533534 } ) ;
534- await expect ( client . screenshot ( new ScreenshotConfig ( { url } ) ) ) . rejects . toThrow ( errors . UnableToTakeScreenshot ) ;
535+ await expect ( client . screenshot ( new ScreenshotConfig ( { url } ) ) ) . rejects . toThrow ( errors . ScreenshotApiError ) ;
535536 expect ( spy ) . toHaveBeenCalledTimes ( 1 ) ;
536537 } ) ;
537538
@@ -551,7 +552,101 @@ describe('screenshot', () => {
551552 } ,
552553 } ) ;
553554 } ) ;
554- await expect ( client . screenshot ( new ScreenshotConfig ( { url } ) ) ) . rejects . toThrow ( errors . ScreenshotInvalidContent ) ;
555+ await expect ( client . screenshot ( new ScreenshotConfig ( { url } ) ) ) . rejects . toThrow ( errors . ScreenshotApiError ) ;
555556 expect ( spy ) . toHaveBeenCalledTimes ( 1 ) ;
556557 } ) ;
557558} ) ;
559+
560+ describe ( 'extract' , ( ) => {
561+ const KEY = '__API_KEY__' ;
562+ const client = new ScrapflyClient ( { key : KEY } ) ;
563+
564+ beforeEach ( ( ) => {
565+ jest . spyOn ( client , 'fetch' ) . mockClear ( ) ; // clear all mock meta on each test
566+ } ) ;
567+
568+ it ( 'succeeds' , async ( ) => {
569+ const spy = jest . spyOn ( client , 'fetch' ) ;
570+ const html = 'very long html file' ;
571+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( config : Request ) : Promise < any > => {
572+ const configUrl = config [ Object . getOwnPropertySymbols ( config ) [ 1 ] ] . url ;
573+ const configBody = config [ Object . getOwnPropertySymbols ( config ) [ 1 ] ] . body . source ;
574+ // Ensure the URL matches the pattern
575+ expect ( configUrl . origin + configUrl . pathname ) . toEqual ( client . HOST + '/extraction' ) ;
576+ expect ( config . method ) . toEqual ( 'POST' ) ;
577+ expect ( configUrl . searchParams . get ( 'key' ) ) . toMatch ( KEY ) ;
578+ expect ( configBody ) . toEqual ( html ) ;
579+ const body = { data : 'a document summary' , content_type : 'text/html' } ;
580+ return responseFactory ( body , {
581+ status : 200 ,
582+ } ) ;
583+ } ) ;
584+
585+ const result = await client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ;
586+ expect ( result ) . toBeDefined ( ) ;
587+ expect ( result . content_type ) . toBe ( 'text/html' ) ;
588+ expect ( result . data ) . toBe ( 'a document summary' ) ;
589+ expect ( spy ) . toHaveBeenCalledTimes ( 1 ) ;
590+ } ) ;
591+
592+ it ( 'fails due to failing to invalid config' , async ( ) => {
593+ const html = 'very long html file' ;
594+ await expect (
595+ client . extract (
596+ new ExtractionConfig ( {
597+ body : html ,
598+ content_type : 'text/html' ,
599+ epehemeral_template : { source : 'html' } ,
600+ template : 'template' ,
601+ } ) ,
602+ ) ,
603+ ) . rejects . toThrow ( errors . ExtractionConfigError ) ;
604+ } ) ;
605+
606+ it ( 'fails to invalid API key' , async ( ) => {
607+ const html = 'very long html file' ;
608+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( ) : Promise < any > => {
609+ const result = {
610+ status : 'error' ,
611+ http_code : 401 ,
612+ reason : 'Unauthorized' ,
613+ error_id : '301e2d9e-b4f5-4289-85ea-e452143338df' ,
614+ message : 'Invalid API key' ,
615+ } ;
616+ return responseFactory ( result , {
617+ status : 401 ,
618+ headers : {
619+ 'Content-Type' : 'application/json' ,
620+ } ,
621+ } ) ;
622+ } ) ;
623+ await expect ( client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ) . rejects . toThrow (
624+ errors . BadApiKeyError ,
625+ ) ;
626+ } ) ;
627+
628+ it ( 'fails to any extraction related error' , async ( ) => {
629+ const html = 'very long html file' ;
630+ jest . spyOn ( client , 'fetch' ) . mockImplementation ( async ( ) : Promise < any > => {
631+ const result = {
632+ code : 'ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
633+ error_id : 'f0e9a6af-846a-49ab-8321-e21bb12bf494' ,
634+ http_code : 422 ,
635+ links : {
636+ 'Related Error Doc' :
637+ 'https://scrapfly.io/docs/extraction-api/error/ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
638+ } ,
639+ message : 'ERR::EXTRACTION::CONTENT_TYPE_NOT_SUPPORTED' ,
640+ } ;
641+ return responseFactory ( result , {
642+ status : 422 ,
643+ headers : {
644+ 'Content-Type' : 'application/json' ,
645+ } ,
646+ } ) ;
647+ } ) ;
648+ await expect ( client . extract ( new ExtractionConfig ( { body : html , content_type : 'text/html' } ) ) ) . rejects . toThrow (
649+ errors . ExtractionApiError ,
650+ ) ;
651+ } ) ;
652+ } ) ;
0 commit comments