From bfe0e8887619b16b60531bd5f8105b74d94324b5 Mon Sep 17 00:00:00 2001 From: Ali Serag Date: Fri, 21 Feb 2025 11:32:57 -0800 Subject: [PATCH 1/5] improve sitemap settings so crawling is more effective --- docsearch.config.json | 26 +++++++++++--------------- docusaurus.config.js | 42 ++++++++++++++++++++++++++++++++---------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/docsearch.config.json b/docsearch.config.json index de0c11dd98..bc55497248 100644 --- a/docsearch.config.json +++ b/docsearch.config.json @@ -1,15 +1,14 @@ { "index_name": "flow_docs", "start_urls": [ - "https://developers.flow.com/" - ], - "sitemap_urls": [ - "https://developers.flow.com/sitemap.xml" + { + "url": "https://developers.flow.com/", + "tags": ["docs"] + } ], + "sitemap_urls": ["https://developers.flow.com/sitemap.xml"], "sitemap_alternate_links": true, - "stop_urls": [ - "/tests" - ], + "stop_urls": ["/tests"], "selectors": { "lvl0": { "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", @@ -28,12 +27,7 @@ "strip_chars": " .,;:#", "custom_settings": { "separatorsToIndex": "_", - "attributesForFaceting": [ - "language", - "version", - "type", - "docusaurus_tag" - ], + "attributesForFaceting": ["language", "version", "type", "docusaurus_tag"], "attributesToRetrieve": [ "hierarchy", "content", @@ -42,5 +36,7 @@ "url_without_anchor", "type" ] - } -} \ No newline at end of file + }, + "js_render": true, + "js_wait": 1 +} diff --git a/docusaurus.config.js b/docusaurus.config.js index 7f0da7cfcd..c8b38eef81 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -153,7 +153,7 @@ const config = { url: getUrl(), // Set the // pathname under which your site is served // For GitHub pages deployment, it is often '//' - baseUrl, + baseUrl: baseUrl, // GitHub pages deployment config. // If you aren't using GitHub pages, you don't need these. @@ -205,11 +205,11 @@ const config = { }, ...(process.env.GTAG ? { - gtag: { - trackingID: process.env.GTAG, - anonymizeIP: true, - }, - } + gtag: { + trackingID: process.env.GTAG, + anonymizeIP: true, + }, + } : {}), }), ], @@ -244,14 +244,27 @@ const config = { image: 'img/flow-docs-og-1200-630.png', metadata: [ { name: 'twitter:card', content: 'summary_large_image' }, - { name: 'twitter:image', content: getUrl() + '/img/flow-docs-og-1200-630.png' }, - { property: 'og:image', content: getUrl() + '/img/flow-docs-og-1200-630.png' }, + { + name: 'twitter:image', + content: getUrl() + '/img/flow-docs-og-1200-630.png', + }, + { + property: 'og:image', + content: getUrl() + '/img/flow-docs-og-1200-630.png', + }, { property: 'og:image:type', content: 'image/png' }, { property: 'og:image:width', content: '1200' }, { property: 'og:image:height', content: '630' }, { property: 'og:type', content: 'website' }, - { property: 'og:description', content: 'Flow Developer Documentation - The future of culture and digital assets is built on Flow' }, - { property: 'og:logo', content: getUrl() + '/img/flow-docs-logo-light.png' }, + { + property: 'og:description', + content: + 'Flow Developer Documentation - The future of culture and digital assets is built on Flow', + }, + { + property: 'og:logo', + content: getUrl() + '/img/flow-docs-logo-light.png', + }, ], docs: { sidebar: { @@ -656,6 +669,15 @@ const config = { }, }; }, + [ + '@docusaurus/plugin-sitemap', + { + changefreq: 'daily', + priority: 0.5, + ignorePatterns: ['/tags/**'], + sitemapSize: 5000, + }, + ], ], stylesheets: [ { From d9031b0e90b734065100f849f758aa35e8c9fe56 Mon Sep 17 00:00:00 2001 From: Ali Serag Date: Fri, 21 Feb 2025 11:38:29 -0800 Subject: [PATCH 2/5] fix build issues --- docusaurus.config.js | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/docusaurus.config.js b/docusaurus.config.js index c8b38eef81..3a32417164 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -203,6 +203,11 @@ const config = { require.resolve('./src/css/custom.css'), ], }, + sitemap: { + changefreq: 'daily', + priority: 0.5, + ignorePatterns: ['/tags/**'], + }, ...(process.env.GTAG ? { gtag: { @@ -669,15 +674,6 @@ const config = { }, }; }, - [ - '@docusaurus/plugin-sitemap', - { - changefreq: 'daily', - priority: 0.5, - ignorePatterns: ['/tags/**'], - sitemapSize: 5000, - }, - ], ], stylesheets: [ { From 2ed5cae793df7a86ace7c539ac42963b88e97277 Mon Sep 17 00:00:00 2001 From: Ali Serag Date: Fri, 21 Feb 2025 11:40:49 -0800 Subject: [PATCH 3/5] set different priorities to various potential startpaths that can be crawled --- docsearch.config.json | 54 +++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/docsearch.config.json b/docsearch.config.json index bc55497248..a78f0d1a26 100644 --- a/docsearch.config.json +++ b/docsearch.config.json @@ -3,26 +3,48 @@ "start_urls": [ { "url": "https://developers.flow.com/", - "tags": ["docs"] + "selectors_key": "default", + "tags": ["docs"], + "page_rank": 1 + }, + { + "url": "https://developers.flow.com/build/", + "selectors_key": "default", + "tags": ["build"], + "page_rank": 2 + }, + { + "url": "https://developers.flow.com/tools/", + "selectors_key": "default", + "tags": ["tools"], + "page_rank": 2 + }, + { + "url": "https://developers.flow.com/ecosystem/", + "selectors_key": "default", + "tags": ["ecosystem"], + "page_rank": 2 } ], "sitemap_urls": ["https://developers.flow.com/sitemap.xml"], "sitemap_alternate_links": true, "stop_urls": ["/tests"], "selectors": { - "lvl0": { - "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", - "type": "xpath", - "global": true, - "default_value": "Documentation" - }, - "lvl1": "article h1, header h1", - "lvl2": "article h2", - "lvl3": "article h3", - "lvl4": "article h4", - "lvl5": "article h5, article td:first-child", - "lvl6": "article h6", - "text": "article p, article li, article td:last-child" + "default": { + "lvl0": { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", + "type": "xpath", + "global": true, + "default_value": "Documentation" + }, + "lvl1": "article h1, header h1", + "lvl2": "article h2", + "lvl3": "article h3", + "lvl4": "article h4", + "lvl5": "article h5, article td:first-child", + "lvl6": "article h6", + "text": "article p, article li, article td:last-child" + } }, "strip_chars": " .,;:#", "custom_settings": { @@ -38,5 +60,7 @@ ] }, "js_render": true, - "js_wait": 1 + "js_wait": 2, + "use_anchors": true, + "scrape_start_urls": true } From e9c7ae5ac8a865d42b594c628268900e10112f72 Mon Sep 17 00:00:00 2001 From: Ali Serag Date: Fri, 21 Feb 2025 11:52:19 -0800 Subject: [PATCH 4/5] Include all content setting min_indexed_level to 0 should make it index all top-level navigation down to the smallest headings. --- docsearch.config.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docsearch.config.json b/docsearch.config.json index a78f0d1a26..b658f2888a 100644 --- a/docsearch.config.json +++ b/docsearch.config.json @@ -62,5 +62,6 @@ "js_render": true, "js_wait": 2, "use_anchors": true, - "scrape_start_urls": true + "scrape_start_urls": true, + "min_indexed_level": 0 } From e4b7977b89c52d268dc0d1c7941456b32a2aecb5 Mon Sep 17 00:00:00 2001 From: Ali Serag Date: Fri, 21 Feb 2025 12:26:36 -0800 Subject: [PATCH 5/5] Remove note on investigating indexing and add homepage explicit selectors --- docs/tools/vscode-extension/cursor/index.md | 1 - docsearch.config.json | 34 ++++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/docs/tools/vscode-extension/cursor/index.md b/docs/tools/vscode-extension/cursor/index.md index 8c4f0bb77b..6990ca6c00 100644 --- a/docs/tools/vscode-extension/cursor/index.md +++ b/docs/tools/vscode-extension/cursor/index.md @@ -17,7 +17,6 @@ Adding Flow docs lets you interact with our docs directly and get the most accur ![Cursor Settings](./images//use-cursor-1.png) 2. Set Flow Docs: - Enter the URL of the Flow docs: `https://developers.flow.com/tools` and press Enter. - - Note: This **will index all** the docs. We're investigating why you need `/tools` - Cursor will automatically detect the Flow docs and index them for you. - Ensure the name is `Flow`, and click "Confirm" to add the docs. ![Cursor Settings](./images//use-cursor-2.png) diff --git a/docsearch.config.json b/docsearch.config.json index b658f2888a..b95c15970c 100644 --- a/docsearch.config.json +++ b/docsearch.config.json @@ -3,8 +3,8 @@ "start_urls": [ { "url": "https://developers.flow.com/", - "selectors_key": "default", - "tags": ["docs"], + "selectors_key": "homepage", + "tags": ["homepage"], "page_rank": 1 }, { @@ -44,12 +44,33 @@ "lvl5": "article h5, article td:first-child", "lvl6": "article h6", "text": "article p, article li, article td:last-child" + }, + "homepage": { + "lvl0": { + "selector": "nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]", + "type": "xpath", + "global": true, + "default_value": "Flow Developer Portal" + }, + "lvl1": "h1, header h1, .hero__title, main h1", + "lvl2": "h2, .hero__subtitle, .cards h3, main h2", + "lvl3": "h3, .cards h4, main h3", + "lvl4": "h4, main h4", + "lvl5": "h5", + "lvl6": "h6", + "text": "p, li, td, .hero__subtitle, .cards p, main p" } }, "strip_chars": " .,;:#", "custom_settings": { "separatorsToIndex": "_", - "attributesForFaceting": ["language", "version", "type", "docusaurus_tag"], + "attributesForFaceting": [ + "language", + "version", + "type", + "docusaurus_tag", + "tags" + ], "attributesToRetrieve": [ "hierarchy", "content", @@ -57,11 +78,14 @@ "url", "url_without_anchor", "type" - ] + ], + "distinct": true, + "attributeForDistinct": "url" }, "js_render": true, "js_wait": 2, "use_anchors": true, "scrape_start_urls": true, - "min_indexed_level": 0 + "min_indexed_level": 0, + "only_content_level": false }