From 070654db4d04811d61bce1cb362d2d2fb25d36d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Compagnon?= Date: Sat, 26 Jul 2025 15:30:59 +0200 Subject: [PATCH] feat(scrape): add advanced scrape options --- src/views/ScrapeView.vue | 131 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/src/views/ScrapeView.vue b/src/views/ScrapeView.vue index 96248bf..ea4d24e 100644 --- a/src/views/ScrapeView.vue +++ b/src/views/ScrapeView.vue @@ -97,6 +97,16 @@ /> Delay before fetching content. +
+ + + Use cached page if younger than this age. +
Remove Base64 Images + +
@@ -164,6 +182,56 @@ Select request location.
+
+ +
+ + + + + + + + + +
+ +
@@ -238,10 +306,14 @@ type ScrapeResult = ScrapeResponse; * @property {number} [timeout] - Page request timeout in milliseconds. * @property {boolean} [blockAds] - Block ads and popups. * @property {boolean} [removeBase64Images] - Remove Base64 encoded images. + * @property {number} [maxAge] - Use cached page if younger than this age in milliseconds. + * @property {boolean} [parsePDF] - Parse PDF files instead of returning base64. + * @property {boolean} [storeInCache] - Store the page in Firecrawl cache. * @property {'basic' | 'stealth' | ''} [proxy] - Proxy type for the request ('basic', 'stealth', or empty for auto). * @property {Record} [headers] - HTTP headers as a JSON object. * @property {string} [action] - HTTP method for the request (e.g., 'GET', 'POST'). * @property {string} [location] - Request location (e.g., 'US', 'EU', 'ASIA'). + * @property {any[]} [actions] - Actions to perform on the page prior to scraping. */ interface FormDataPageOptions { waitFor?: number; @@ -250,10 +322,14 @@ interface FormDataPageOptions { timeout?: number; blockAds?: boolean; removeBase64Images?: boolean; + maxAge?: number; + parsePDF?: boolean; + storeInCache?: boolean; proxy?: 'basic' | 'stealth' | ''; headers?: Record; action?: string; location?: string; + actions?: any[]; } /** @@ -331,12 +407,16 @@ export default defineComponent({ mobile: false, skipTlsVerification: false, timeout: undefined, + maxAge: undefined, blockAds: true, removeBase64Images: true, + parsePDF: true, + storeInCache: true, proxy: '', headers: {}, action: 'GET', // Default HTTP action location: '', + actions: [], }, scrapeOptions: { onlyMainContent: true, @@ -428,6 +508,10 @@ export default defineComponent({ formData.value.pageOptions.waitFor > 0 && { waitFor: formData.value.pageOptions.waitFor, }), + ...(formData.value.pageOptions.maxAge !== undefined && + formData.value.pageOptions.maxAge > 0 && { + maxAge: formData.value.pageOptions.maxAge, + }), ...(formData.value.pageOptions.mobile === true && { mobile: true }), ...(formData.value.pageOptions.skipTlsVerification === true && { skipTlsVerification: true, @@ -443,6 +527,10 @@ export default defineComponent({ ...(formData.value.pageOptions.removeBase64Images === false && { removeBase64Images: false, }), + ...(formData.value.pageOptions.parsePDF === false && { parsePDF: false }), + ...(formData.value.pageOptions.storeInCache === false && { + storeInCache: false, + }), ...(formData.value.pageOptions.proxy && formData.value.pageOptions.proxy !== '' && { proxy: formData.value.pageOptions.proxy as 'basic' | 'stealth', @@ -455,6 +543,10 @@ export default defineComponent({ formData.value.pageOptions.location !== '' && { location: { country: formData.value.pageOptions.location }, }), + ...(formData.value.pageOptions.actions && + formData.value.pageOptions.actions.length > 0 && { + actions: formData.value.pageOptions.actions, + }), // Include scrapeOptions properties directly. formats: formData.value.scrapeOptions @@ -628,6 +720,35 @@ export default defineComponent({ } }); + /** + * Available action types for dynamic actions list. + */ + const actionTypes = [ + 'wait', + 'screenshot', + 'click', + 'write', + 'press', + 'scroll', + 'scrape', + 'executeJavascript', + ]; + + /** + * Add a new action with default values. + */ + const addAction = (): void => { + formData.value.pageOptions.actions.push({ type: actionTypes[0] }); + }; + + /** + * Remove an action at a specified index. + * @param {number} idx - Index of the action to remove. + */ + const removeAction = (idx: number): void => { + formData.value.pageOptions.actions.splice(idx, 1); + }; + return { formData, loading, @@ -639,6 +760,9 @@ export default defineComponent({ downloadFormats, extractorOptionsJson, extractorOptionsError, + actionTypes, + addAction, + removeAction, ScrapeAndExtractFromUrlRequestFormatsEnum, isScrapeOptionsCollapsed, isPageOptionsCollapsed, @@ -766,6 +890,13 @@ export default defineComponent({ animation: spin 1s linear infinite; } +.action-item { + display: flex; + align-items: center; + gap: 5px; + margin-bottom: 10px; +} + @keyframes spin { 0% { transform: rotate(0deg);