diff --git a/.github/workflows/deploy_to_production.yml b/.github/workflows/deploy_to_production.yml index bf1c0f8..18bfd05 100644 --- a/.github/workflows/deploy_to_production.yml +++ b/.github/workflows/deploy_to_production.yml @@ -22,10 +22,10 @@ jobs: ref: master submodules: true - - name: Set up Node.js and Yarn + - name: Set up Node.js uses: actions/setup-node@v3 with: - node-version: 20 + node-version: 20.11.1 - name: Install Dependencies run: | @@ -43,4 +43,11 @@ jobs: set -e npm -g install @platformos/pos-cli pos-cli deploy + pos-cli constants set --name modules/openai/OPENAI_REINDEX_TOKEN --value ${{ secrets.OPENAI_REINDEX_TOKEN }} + pos-cli constants set --name modules/openai/OPENAI_SECRET_TOKEN --value ${{ secrets.OPENAI_SECRET_TOKEN }} + - name: Search reindex + uses: fjogeleit/http-request-action@v1 + with: + url: '${{ secrets.MPKIT_PROD_URL }}_embeddings/reindex.json?token=${{ secrets.OPENAI_REINDEX_TOKEN }}' + method: 'GET' diff --git a/.github/workflows/deploy_to_qa.yml b/.github/workflows/deploy_to_qa.yml index f67dddf..ccc6842 100644 --- a/.github/workflows/deploy_to_qa.yml +++ b/.github/workflows/deploy_to_qa.yml @@ -24,10 +24,10 @@ jobs: ref: master submodules: true - - name: Set up Node.js and Yarn + - name: Set up Node.js uses: actions/setup-node@v3 with: - node-version: 20 + node-version: 20.11.1 - name: Install Dependencies run: | @@ -45,3 +45,11 @@ jobs: set -e npm -g install @platformos/pos-cli pos-cli deploy + pos-cli constants set --name modules/openai/OPENAI_REINDEX_TOKEN --value ${{ secrets.OPENAI_REINDEX_TOKEN }} + pos-cli constants set --name modules/openai/OPENAI_SECRET_TOKEN --value ${{ secrets.OPENAI_SECRET_TOKEN }} + + - name: Search reindex + uses: fjogeleit/http-request-action@v1 + with: + url: '${{ secrets.MPKIT_QA_URL }}_embeddings/reindex.json?token=${{ secrets.OPENAI_REINDEX_TOKEN }}' + method: 'GET' diff --git a/.github/workflows/preview.yml b/.github/workflows/preview.yml index bfa9272..ab27adb 100644 --- a/.github/workflows/preview.yml +++ b/.github/workflows/preview.yml @@ -22,10 +22,10 @@ jobs: with: submodules: true - - name: Set up Node.js and Yarn + - name: Set up Node.js uses: actions/setup-node@v3 with: - node-version: 20 + node-version: 20.11.1 - name: Install and run eslint id: eslint-ci @@ -56,6 +56,14 @@ jobs: set -e npm -g install @platformos/pos-cli pos-cli deploy + pos-cli constants set --name modules/openai/OPENAI_REINDEX_TOKEN --value ${{ secrets.OPENAI_REINDEX_TOKEN }} + pos-cli constants set --name modules/openai/OPENAI_SECRET_TOKEN --value ${{ secrets.OPENAI_SECRET_TOKEN }} + + - name: Search reindex + uses: fjogeleit/http-request-action@v1 + with: + url: '${{ secrets.MPKIT_PREVIEW_URL }}_embeddings/reindex.json?token=${{ secrets.OPENAI_REINDEX_TOKEN }}' + method: 'GET' - name: Leave a comment after preview deployment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/.gitignore b/.gitignore index b6381ce..baf6d07 100644 --- a/.gitignore +++ b/.gitignore @@ -57,6 +57,7 @@ typings/ # gatsby files .cache/ src/public +./public/ # Mac files .DS_Store @@ -74,3 +75,4 @@ app/assets/* tmp/ .posify/ reports/ + diff --git a/.gitmodules b/.gitmodules index 4294331..5dc128d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,6 @@ [submodule "theme-manager"] path = modules/theme-manager url = git@github.com:Platform-OS/pos-module-theme-manager.git +[submodule "openai"] + path = modules/openai + url = git@github.com:Platform-OS/pos-module-openai.git diff --git a/app/config.yml b/app/config.yml index d8a70fa..bdedb40 100644 --- a/app/config.yml +++ b/app/config.yml @@ -11,3 +11,5 @@ modules_that_allow_delete_on_deploy: - permission - theme-manager - user + - openai + - openai-docs-kit diff --git a/modules/openai b/modules/openai new file mode 160000 index 0000000..b5d071e --- /dev/null +++ b/modules/openai @@ -0,0 +1 @@ +Subproject commit b5d071e9691e49af414f4f89774019662b9a5a74 diff --git a/modules/openai-docs-kit/public/graphql/pages/search.graphql b/modules/openai-docs-kit/public/graphql/pages/search.graphql new file mode 100644 index 0000000..6c38ea4 --- /dev/null +++ b/modules/openai-docs-kit/public/graphql/pages/search.graphql @@ -0,0 +1,18 @@ +query search_for_openai($not_slugs: [String!], $limit: Int = 2000) { + pages: admin_pages( + per_page: $limit + filter: { + slug: { + not_value_in: $not_slugs + } + } + ) { + results { + slug + content + html_content + metadata + } + } +} + diff --git a/modules/openai-docs-kit/public/lib/commands/openai/pages_to_embeddings.liquid b/modules/openai-docs-kit/public/lib/commands/openai/pages_to_embeddings.liquid new file mode 100644 index 0000000..aea7f47 --- /dev/null +++ b/modules/openai-docs-kit/public/lib/commands/openai/pages_to_embeddings.liquid @@ -0,0 +1,117 @@ +{% liquid + + function all_pages = 'modules/openai-docs-kit/queries/pages/search_for_openai', limit: null + function object = 'modules/openai/queries/embeddings/search', limit: null, metadata: null, related_to: null + assign existing_embeddings = object.results + + assign report = '{ "created": { "slugs": [], "count": 0 }, "updated": { "slugs": [], "count": 0 }, "skipped": { "slugs": [], "count": 0 }, "deleted": { "slugs": [], "count": 0 }, "errors": { "slugs": [], "count": 0 } }' | parse_json + + assign pages = '[]' | parse_json + + for page in all_pages + assign page_content_has_changed = true + hash_assign page['title'] = page.html_content | html_to_text: root_element: 'title' + hash_assign page['html_content'] = page.html_content | html_to_text: root_element: 'main#content' + + if page.html_content == blank + continue + endif + + hash_assign page['sha1'] = page['html_content'] | sha1 + + for existing_embedding in existing_embeddings + if existing_embedding.metadata.slug == page.slug and existing_embedding.metadata.sha1 == page.sha1 + assign page_content_has_changed = false + hash_assign report['skipped']['slugs'] = report['skipped']['slugs'] | add_to_array: page.slug + hash_assign report['skipped']['count'] = report['skipped']['count'] | plus: 1 + break + endif + endfor + + if page_content_has_changed + assign pages = pages | array_add: page + endif + endfor + + assign existing_slugs = all_pages | map: 'slug' + for existing_embedding in existing_embeddings + unless existing_slugs contains existing_embedding.metadata.slug + hash_assign report['deleted']['slugs'] = report['deleted']['slugs'] | add_to_array: existing_embedding.metadata.slug + hash_assign report['deleted']['count'] = report['deleted']['count'] | plus: 1 + function object = 'modules/openai/commands/embeddings/delete', id: existing_embedding.id + endunless + endfor + + assign pages_chunks = pages | array_in_groups_of: 50 + + for page_chunk in pages_chunks + assign i = + assign data = page_chunk | map: 'html_content' + + function response = 'modules/openai/commands/openai/fetch_embeddings', object: data + if response.data.size > 0 + assign embeddings = response.data + + for emb in embeddings + assign pos_embedding_input = '{}' | parse_json + assign metadata = '{}' | parse_json + + hash_assign metadata['slug'] = page_chunk[i].slug + hash_assign metadata['page'] = '{}' | parse_json + hash_assign metadata['page_metadata'] = page_chunk[i].metadata + if metadata['page_metadata'] == blank + hash_assign metadata['page_metadata'] = '{}' | parse_json + endif + hash_assign metadata['page_metadata']['title'] = page_chunk[i].title + hash_assign metadata['sha1'] = page_chunk[i].sha1 + + hash_assign pos_embedding_input['metadata'] = metadata + hash_assign pos_embedding_input['embedding'] = emb.embedding + hash_assign pos_embedding_input['content'] = page_chunk[i].html_content + + assign found_embedding = null + + for existing_embedding in existing_embeddings + if existing_embedding.metadata.slug == pos_embedding_input.metadata.slug + assign found_embedding = existing_embedding + break + endif + endfor + + if found_embedding + hash_assign pos_embedding_input['id'] = found_embedding.id + function pos_embedding = 'modules/openai/commands/embeddings/update', object: pos_embedding_input, id: null + + + if pos_embedding.valid + hash_assign report['updated']['slugs'] = report['updated']['slugs'] | add_to_array: pos_embedding_input.metadata.slug + hash_assign report['updated']['count'] = report['updated']['count'] | plus: 1 + else + assign err = "Failed to update Embedding#" | append: pos_embedding_input.id | append: ": " | append: pos_embedding.errors + hash_assign report['errors']['slugs'] = report['errors']['slugs'] | add_to_array: pos_embedding_input.metadata.slug + hash_assign report['errors']['count'] = report['errors']['count'] | plus: 1 + log err, type: 'ERROR' + endif + else + function pos_embedding = 'modules/openai/commands/embeddings/create', object: pos_embedding_input + if pos_embedding.valid + hash_assign report['created']['slugs'] = report['created']['slugs'] | add_to_array: pos_embedding_input.metadata.slug + hash_assign report['created']['count'] = report['created']['count'] | plus: 1 + else + assign err = "Failed to create Embedding: " | append: pos_embedding.errors + hash_assign report['errors']['slugs'] = report['errors']['slugs'] | add_to_array: pos_embedding_input.metadata.slug + hash_assign report['errors']['count'] = report['errors']['count'] | plus: 1 + log err, type: 'ERROR' + endif + endif + + + assign i = i | plus: 1 + endfor + endif + endfor + + return report + +%} + diff --git a/modules/openai-docs-kit/public/lib/queries/pages/search_for_openai.liquid b/modules/openai-docs-kit/public/lib/queries/pages/search_for_openai.liquid new file mode 100644 index 0000000..8e12ef4 --- /dev/null +++ b/modules/openai-docs-kit/public/lib/queries/pages/search_for_openai.liquid @@ -0,0 +1,27 @@ +{% liquid + + # assign ignored_slugs = context.constants['modules/openai/OPENAI_PAGE_SLUGS_TO_IGNORE'] | default: '404,manifest.webmanifest,search,/' | split: ',' + # assign ignored_prefixes = context.constants['modules/openai/OPENAI_PAGE_SLUG_PREFIXES_TO_IGNORE'] | default: '_,framework-,app-,~partytown,api/,user/,admin/,styleguide' | split: ',' + + assign ignored_slugs = '404,manifest.webmanifest,search,/' | split: ',' + assign ignored_prefixes = '_,framework-,app-,~partytown,api,user,admin,styleguide,sessions,session,sitemap' | split: ',' + + graphql r = 'modules/openai-docs-kit/pages/search', not_slugs: ignored_slugs, limit: limit + + assign all_pages = r.pages.results + + if ignored_prefixes == empty + return r.pages.results + else + assign pages = '[]' | parse_json + for page in all_pages + assign contains_invalid_prefix = page.slug | start_with: ignored_prefixes + unless contains_invalid_prefix + assign pages = pages | add_to_array: page + endunless + endfor + return pages + endif + +%} + diff --git a/modules/openai-docs-kit/public/views/pages/_embeddings/reindex.json.liquid b/modules/openai-docs-kit/public/views/pages/_embeddings/reindex.json.liquid new file mode 100644 index 0000000..1f5e679 --- /dev/null +++ b/modules/openai-docs-kit/public/views/pages/_embeddings/reindex.json.liquid @@ -0,0 +1,7 @@ +{%- if context.params.token == context.constants['modules/openai/OPENAI_REINDEX_TOKEN'] and context.params.token != blank -%} + {%- function res = 'modules/openai-docs-kit/commands/openai/pages_to_embeddings' -%} + {{- res -}} +{%- else -%} + Unauthorized + {% response_status 401 %} +{%- endif -%} diff --git a/modules/openai-docs-kit/public/views/pages/search.json.liquid b/modules/openai-docs-kit/public/views/pages/search.json.liquid new file mode 100644 index 0000000..f88e619 --- /dev/null +++ b/modules/openai-docs-kit/public/views/pages/search.json.liquid @@ -0,0 +1,54 @@ +--- +slug: api/search +metadata: + title: OpenAI Search +--- +{%- liquid + assign res = '{}' | parse_json + if context.constants['modules/openai/OPENAI_SECRET_TOKEN'] == blank + response_status 401 + hash_assign res['error'] = "OpenAI is not configured in this environment." + else + function hcaptcha_already_solved = 'modules/core/commands/session/get', key: 'hcpatcha_solved', clear: false + if context.params.query != blank and context.params.query.size < 1000 + assign c = '{ "errors": {}, "valid": true }' | parse_json + # function c = 'modules/core/lib/validations/hcaptcha', c: c, hcaptcha_params: context.params + if hcaptcha_already_solved == 'true' or c.valid + function _hcaptcha_already_solved = 'modules/core/commands/session/set', key: 'hcpatcha_solved', value: 'true' + assign user_input = '[]' | parse_json | add_to_array: context.params.query + function embeddings_response = 'modules/openai/commands/openai/fetch_embeddings', object: user_input + + if embeddings_response.data.size > 0 + assign embedding = embeddings_response.data.first.embedding + + function related_embeddings = 'modules/openai/queries/embeddings/search', related_to: embedding, limit: 15, metadata: null + assign search_results = related_embeddings.results + else + hash_assign res['error'] = "Could not fetch embeddings: " | append: embeddings_response + endif + else + hash_assign res['error'] = "Validation error: " | append: c.errors + endif + endif + + if search_results + assign results = '[]' | parse_json + for embedding in search_results + assign result = '{}' | parse_json + if embedding.metadata.slug == '/' + assign slug = embedding.metadata.slug + else + assign slug = '/' | append: embedding.metadata.slug + endif + hash_assign result['href'] = slug + hash_assign result['title'] = embedding.metadata.page_metadata.title | default: slug + hash_assign result['description'] = embedding.metadata.page_metadata.description + assign results = results | array_add: result + endfor + hash_assign res['results'] = results + endif + + endif + print res +-%} + diff --git a/src/gatsby-config.js b/src/gatsby-config.js index 4ccd5aa..4538c86 100644 --- a/src/gatsby-config.js +++ b/src/gatsby-config.js @@ -25,42 +25,6 @@ module.exports = { icon: path.join(__dirname, 'src/images/favicon.png'), // This path is relative to the root of the site. }, }, - { - resolve: 'gatsby-plugin-local-search', - options: { - name: 'pages', - engine: 'flexsearch', - engineOptions: { - tokenize: 'forward' - }, - query: ` - { - allMdx { - nodes { - id - body - frontmatter { - title - description - slug - } - } - } - } - `, - ref: 'id', - index: ['title', 'body', 'description'], - store: ['id', 'slug', 'title', 'description'], - normalizer: ({ data }) => - data.allMdx.nodes.map((node) => ({ - id: node.id, - slug: node.frontmatter.slug, - title: node.frontmatter.title, - description: node.frontmatter.description, - body: node.body - })), - }, - }, { resolve: `gatsby-plugin-plausible`, options: { diff --git a/src/package-lock.json b/src/package-lock.json index 4a4aa3e..f284a84 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -15,13 +15,11 @@ "gatsby": "^5.10.0", "gatsby-cookie-notice": "^1.0.6", "gatsby-plugin-gdpr-cookies": "^2.0.9", - "gatsby-plugin-local-search": "^2.0.1", "gatsby-plugin-manifest": "^5.10.0", "gatsby-plugin-plausible": "^0.0.7", "query-string": "^8.1.0", "react": "^18.2.0", "react-dom": "^18.2.0", - "react-use-flexsearch": "^0.1.1", "sitemap": "^7.1.1" } }, @@ -11979,11 +11977,6 @@ "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.2.7.tgz", "integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==" }, - "node_modules/flexsearch": { - "version": "0.6.32", - "resolved": "https://registry.npmjs.org/flexsearch/-/flexsearch-0.6.32.tgz", - "integrity": "sha512-EF1BWkhwoeLtbIlDbY/vDSLBen/E5l/f1Vg7iX5CDymQCamcx1vhlc3tIZxIDplPjgi0jhG37c67idFbjg+v+Q==" - }, "node_modules/follow-redirects": { "version": "1.15.2", "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", @@ -12902,23 +12895,6 @@ "node": ">= 10.0.0" } }, - "node_modules/gatsby-plugin-local-search": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/gatsby-plugin-local-search/-/gatsby-plugin-local-search-2.0.1.tgz", - "integrity": "sha512-qrApdH2IYfHL+dSmcwSzhDPVxlkt13N0IfEkKxfWf0gITmBwObOJBYAMnYiYUmP0dpYmSV9anJE//SLZBSsisA==", - "dependencies": { - "flexsearch": "^0.6.32", - "lodash": "^4.17.19", - "lunr": "^2.3.8", - "pascal-case": "^3.1.1" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "gatsby": ">= 2.20.0" - } - }, "node_modules/gatsby-plugin-manifest": { "version": "5.11.0", "resolved": "https://registry.npmjs.org/gatsby-plugin-manifest/-/gatsby-plugin-manifest-5.11.0.tgz", @@ -18574,11 +18550,6 @@ "es5-ext": "~0.10.2" } }, - "node_modules/lunr": { - "version": "2.3.9", - "resolved": "https://registry.npmjs.org/lunr/-/lunr-2.3.9.tgz", - "integrity": "sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==" - }, "node_modules/lz-string": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/lz-string/-/lz-string-1.5.0.tgz", @@ -23431,17 +23402,6 @@ "node": ">=0.4.0" } }, - "node_modules/react-use-flexsearch": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/react-use-flexsearch/-/react-use-flexsearch-0.1.1.tgz", - "integrity": "sha512-UDRDB26HPcAo0gXNkUYYkcjoYCW4FSWr7Ich4adgVr7bqefJG7fnjlcqnwsKQkbZuteRLYzzox+1FKRTt3Z5vg==", - "dependencies": { - "flexsearch": "^0.6.22" - }, - "peerDependencies": { - "react": ">=16.8" - } - }, "node_modules/read": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/read/-/read-1.0.7.tgz", diff --git a/src/package.json b/src/package.json index 1dc2c3e..95d3f77 100644 --- a/src/package.json +++ b/src/package.json @@ -23,13 +23,11 @@ "gatsby": "^5.10.0", "gatsby-cookie-notice": "^1.0.6", "gatsby-plugin-gdpr-cookies": "^2.0.9", - "gatsby-plugin-local-search": "^2.0.1", "gatsby-plugin-manifest": "^5.10.0", "gatsby-plugin-plausible": "^0.0.7", "query-string": "^8.1.0", "react": "^18.2.0", "react-dom": "^18.2.0", - "react-use-flexsearch": "^0.1.1", "sitemap": "^7.1.1" } } diff --git a/src/src/@platformos/gatsby-theme-platformos-docskit/components/sidebar.js b/src/src/@platformos/gatsby-theme-platformos-docskit/components/sidebar.js index 2c57306..8b3a4d0 100644 --- a/src/src/@platformos/gatsby-theme-platformos-docskit/components/sidebar.js +++ b/src/src/@platformos/gatsby-theme-platformos-docskit/components/sidebar.js @@ -43,7 +43,7 @@ const Sidebar = ({ branch, treeData = [], isMobileNavOpen = false, sidebarEnable