From 578e33221a7fe76fa9867346666a24abf1b813de Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Fri, 5 Apr 2024 22:28:16 +0300 Subject: [PATCH 1/2] [DOC]: Cloudflare Workers AI EF --- .gitignore | 3 +- docs/embeddings.md | 23 ++++++++------- docs/embeddings/cloudflare.md | 55 +++++++++++++++++++++++++++++++++++ sidebars.js | 1 + 4 files changed, 70 insertions(+), 12 deletions(-) create mode 100644 docs/embeddings/cloudflare.md diff --git a/.gitignore b/.gitignore index 7af67c95..54107f3e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,5 @@ npm-debug.log* yarn-debug.log* yarn-error.log* -.tmp \ No newline at end of file +.tmp +.idea/ diff --git a/docs/embeddings.md b/docs/embeddings.md index 680efb58..981eb3d5 100644 --- a/docs/embeddings.md +++ b/docs/embeddings.md @@ -10,17 +10,18 @@ Chroma provides lightweight wrappers around popular embedding providers, making
-| | Python | JS | -|--------------|-----------|---------------| -| [OpenAI](/embeddings/openai) | ✅ | ✅ | -| [Google Generative AI](/embeddings/google-gemini) | ✅ | ✅ | -| [Cohere](/embeddings/cohere) | ✅ | ✅ | -| [Google PaLM](/embeddings/google-palm) | ✅ | ➖ | -| [Hugging Face](/embeddings/hugging-face) | ✅ | ➖ | -| [Instructor](/embeddings/instructor) | ✅ | ➖ | -| [Hugging Face Embedding Server](/embeddings/hugging-face-embedding-server) | ✅ | ✅ | -| [Jina AI](/embeddings/jinaai) | ✅ | ✅ | -| [Roboflow](/embeddings/roboflow-api) | ✅ | ➖ | +| | Python | JS | +|----------------------------------------------------------------------------|--------|----| +| [OpenAI](/embeddings/openai) | ✅ | ✅ | +| [Google Generative AI](/embeddings/google-gemini) | ✅ | ✅ | +| [Cohere](/embeddings/cohere) | ✅ | ✅ | +| [Google PaLM](/embeddings/google-palm) | ✅ | ➖ | +| [Hugging Face](/embeddings/hugging-face) | ✅ | ➖ | +| [Instructor](/embeddings/instructor) | ✅ | ➖ | +| [Hugging Face Embedding Server](/embeddings/hugging-face-embedding-server) | ✅ | ✅ | +| [Jina AI](/embeddings/jinaai) | ✅ | ✅ | +| [Roboflow](/embeddings/roboflow-api) | ✅ | ➖ | +| [Cloudflare Workers AI](/embeddings/cloudflare) | ✅ | ✅ | We welcome pull requests to add new Embedding Functions to the community. diff --git a/docs/embeddings/cloudflare.md b/docs/embeddings/cloudflare.md new file mode 100644 index 00000000..9347b2f9 --- /dev/null +++ b/docs/embeddings/cloudflare.md @@ -0,0 +1,55 @@ +--- +--- + +# Cloudflare Workers AI + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +
Select a language
+ + + + + + +Chroma provides a convenient wrapper around Cloudflare Workers AI REST API. This embedding function runs remotely on a Cloudflare Workers AI. It requires an API key and an account Id or gateway endpoint. You can get an API key by signing up for an account at [Cloudflare Workers AI](https://cloudflare.com/). + +Visit the [Cloudflare Workers AI documentation](https://developers.cloudflare.com/workers-ai/) for more information on getting started. + + + + +```python +import chromadb.utils.embedding_functions as embedding_functions +cf_ef = embedding_functions.CloudflareWorkersAIEmbeddingFunction( + api_key = "YOUR_API_KEY", + account_id = "YOUR_ACCOUNT_ID", + model_name = "@cf/baai/bge-base-en-v1.5", + ) +cf_ef(input=["This is my first text to embed", "This is my second document"]) +``` + +You can pass in an optional `model_name` argument, which lets you choose which Cloudflare Workers AI [model](https://developers.cloudflare.com/workers-ai/models/#text-embeddings) to use. By default, Chroma uses `@cf/baai/bge-base-en-v1.5`. + + + + +```javascript +// const {CloudflareWorkersAIEmbeddingFunction} = require('chromadb'); //CJS import +import {CloudflareWorkersAIEmbeddingFunction} from "chromadb"; //ESM import +const embedder = new CloudflareWorkersAIEmbeddingFunction({ + api_key: 'YOUR_API_KEY', + account_id: "YOUR_ACCOUNT_ID", + model_name: '@cf/baai/bge-base-en-v1.5', +}); + +// use directly +const embeddings = embedder.generate(['document1', 'document2']); + +// pass documents to query for .add and .query +const collection = await client.createCollection({name: "name", embeddingFunction: embedder}) +const collectionGet = await client.getCollection({name:"name", embeddingFunction: embedder}) +``` + + diff --git a/sidebars.js b/sidebars.js index 0f613ab5..457ddda1 100644 --- a/sidebars.js +++ b/sidebars.js @@ -95,6 +95,7 @@ const sidebars = { 'embeddings/roboflow-api', 'embeddings/hugging-face-embedding-server', 'embeddings/jinaai', + 'embeddings/cloudflare', ], }, ], From 60293cce9ed66f8be2fc2560bd34e473f18a3248 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Sat, 6 Apr 2024 19:42:59 +0300 Subject: [PATCH 2/2] docs: Updated js example to align with implementation. - Also added a note about the max batch size. Ref: chroma-core/chroma#1271 --- docs/embeddings/cloudflare.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/embeddings/cloudflare.md b/docs/embeddings/cloudflare.md index 9347b2f9..a96f6409 100644 --- a/docs/embeddings/cloudflare.md +++ b/docs/embeddings/cloudflare.md @@ -17,6 +17,10 @@ Chroma provides a convenient wrapper around Cloudflare Workers AI REST API. This Visit the [Cloudflare Workers AI documentation](https://developers.cloudflare.com/workers-ai/) for more information on getting started. +:::note +Currently cloudflare embeddings endpoints allow batches of maximum 100 documents in a single request. The EF has a hard limit of 100 documents per request, and will raise an error if you try to pass more than 100 documents. +::: + @@ -24,7 +28,7 @@ Visit the [Cloudflare Workers AI documentation](https://developers.cloudflare.co import chromadb.utils.embedding_functions as embedding_functions cf_ef = embedding_functions.CloudflareWorkersAIEmbeddingFunction( api_key = "YOUR_API_KEY", - account_id = "YOUR_ACCOUNT_ID", + account_id = "YOUR_ACCOUNT_ID", # or gateway_endpoint model_name = "@cf/baai/bge-base-en-v1.5", ) cf_ef(input=["This is my first text to embed", "This is my second document"]) @@ -39,9 +43,9 @@ You can pass in an optional `model_name` argument, which lets you choose which C // const {CloudflareWorkersAIEmbeddingFunction} = require('chromadb'); //CJS import import {CloudflareWorkersAIEmbeddingFunction} from "chromadb"; //ESM import const embedder = new CloudflareWorkersAIEmbeddingFunction({ - api_key: 'YOUR_API_KEY', - account_id: "YOUR_ACCOUNT_ID", - model_name: '@cf/baai/bge-base-en-v1.5', + apiToken: 'YOUR_API_KEY', + accountId: "YOUR_ACCOUNT_ID", // or gatewayEndpoint + model: '@cf/baai/bge-base-en-v1.5', }); // use directly