From 9d45d7521b09f21bda8c5ea902164ff5ebda7ced Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Tue, 14 Nov 2023 08:36:33 -0800 Subject: [PATCH 1/2] proposed format for standardized EF pages --- docs/embeddings/openai.md | 99 +++++++++++++++++++++++++-------------- src/css/custom.css | 51 +++++++++++++++++--- 2 files changed, 108 insertions(+), 42 deletions(-) diff --git a/docs/embeddings/openai.md b/docs/embeddings/openai.md index 8510205..986bb63 100644 --- a/docs/embeddings/openai.md +++ b/docs/embeddings/openai.md @@ -3,59 +3,86 @@ # OpenAI -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +Chroma provides a convenient wrapper around OpenAI's embedding API. This embedding function runs remotely on OpenAI's servers, and requires an API key. You can get an API key by signing up for an account at [OpenAI](https://openai.com/api/). -
Select a language
+
- - - - +| Models | Input | Dimensionality | Context Size| Default | +|--|--|--|--|--| +|`ada-002` | English | 1536 | 2048 | ✅ | -Chroma provides a convenient wrapper around OpenAI's embedding API. This embedding function runs remotely on OpenAI's servers, and requires an API key. You can get an API key by signing up for an account at [OpenAI](https://openai.com/api/). +## Basic Usage - - +### Python -This embedding function relies on the `openai` python package, which you can install with `pip install openai`. +```bash +pip install openai +``` ```python -openai_ef = embedding_functions.OpenAIEmbeddingFunction( - api_key="YOUR_API_KEY", - model_name="text-embedding-ada-002" - ) + +from chromadb.utils import embedding_functions + +embedder = embedding_functions.OpenAIEmbeddingFunction( + api_key="YOUR_API_KEY") + +collection = client.create_collection( + name="oai_ef", + embedding_function=embedder) ``` -To use the OpenAI embedding models on other platforms such as Azure, you can use the `api_base` and `api_type` parameters: +### Javascript + +```bash +yarn add openai +``` + +```javascript +import { ChromaClient, OpenAIEmbeddingFunction } from 'chromadb' + +const embedder = new OpenAIEmbeddingFunction({ + openai_api_key: "YOUR_API_KEY" +}) + +const collection = await client.createCollection({ + name: "oai_ef", + embeddingFunction: embedder +}) +``` + +## Advanced Usage + +### Call directly + +By passing the embedding function to a Collection, Chroma handles the embedding of documents and queries for you. However in some cases you may want to generate the embeddings outside and handle them yourself. + +#### Python + ```python -openai_ef = embedding_functions.OpenAIEmbeddingFunction( - api_key="YOUR_API_KEY", - api_base="YOUR_API_BASE_PATH", - api_type="azure", - api_version="YOUR_API_VERSION", - model_name="text-embedding-ada-002" - ) +embeddings = embedder(["document1","document2"]) +# [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] ``` - - +#### Javascript ```javascript -const {OpenAIEmbeddingFunction} = require('chromadb'); -const embedder = new OpenAIEmbeddingFunction({openai_api_key: "apiKey"}) - -// use directly const embeddings = embedder.generate(["document1","document2"]) - -// pass documents to query for .add and .query -const collection = await client.createCollection({name: "name", embeddingFunction: embedder}) -const collection = await client.getCollection({name: "name", embeddingFunction: embedder}) +// [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] ``` - +### Using a different model - +You can pass in an optional `model_name` argument, which lets you choose which OpenAI embeddings model to use. By default, Chroma uses `text-embedding-ada-002`. You can see a list of all available models [here](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings). +### Run with Azure -You can pass in an optional `model_name` argument, which lets you choose which OpenAI embeddings model to use. By default, Chroma uses `text-embedding-ada-002`. You can see a list of all available models [here](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings). \ No newline at end of file +To use the OpenAI embedding models on other platforms such as Azure, you can use the `api_base` and `api_type` parameters: +```python +openai_ef = embedding_functions.OpenAIEmbeddingFunction( + api_key="YOUR_API_KEY", + api_base="YOUR_API_BASE_PATH", + api_type="azure", + api_version="YOUR_API_VERSION", + model_name="text-embedding-ada-002" + ) +``` \ No newline at end of file diff --git a/src/css/custom.css b/src/css/custom.css index b512705..d65c93b 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -406,7 +406,10 @@ article p a { font-size: 1.5rem !important; } h3 { - font-size: 1rem !important; + font-size: 1.2rem !important; + } + h4 { + font-size: 0.9rem !important; } article { max-width: 700px; @@ -483,12 +486,8 @@ article p a { div.special_table + table { border: none; - - /* border-collapse: separate; */ - /* border-spacing: 0px; */ } - div.special_table + table thead { background: rgba(120,120,120, 0.1); border-top-right-radius: 10px; @@ -519,6 +518,8 @@ div.special_table + table, th, td { border-width: 0px !important; } + + .custom-tag { display: inline; background-color: #f0f0f0; @@ -588,4 +589,42 @@ div.special_table + table, th, td { .main-wrapper { min-height: 100vh; -} \ No newline at end of file +} + + +div.data_table + table { + border: none; + padding-top: 20px; + padding-bottom: 20px; +} + +div.data_table + table thead { + background: rgba(120,120,120, 0.1); + border-top-right-radius: 10px; + overflow: hidden; +} + +div.data_table + table thead tr { + background: rgba(255, 255, 255, 0.1); + border-top: 0px; + border-bottom: 0px; + text-align: left; +} +div.data_table + table tr th { + background: rgba(255, 255, 255, 0); + color: #000; + font-weight: 600; + padding: 5px 20px; +} +div.data_table + table tr td { + padding: 5px 20px; + text-align: left; +} + +div.data_table + table tr:nth-child(even) { + background: rgba(255, 255, 255, 0); +} + +div.data_table + table, th, td { + border-width: 0px !important; +} From fa3b0fcfa6044def0710fe25e4d102bddfb209a0 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Tue, 19 Dec 2023 10:39:23 -0800 Subject: [PATCH 2/2] add format to cohere as well --- docs/embeddings/cohere.md | 96 ++++++++++++++++++++++++--------------- docs/embeddings/openai.md | 4 +- src/css/custom.css | 1 + 3 files changed, 63 insertions(+), 38 deletions(-) diff --git a/docs/embeddings/cohere.md b/docs/embeddings/cohere.md index 8831c2c..26805cf 100644 --- a/docs/embeddings/cohere.md +++ b/docs/embeddings/cohere.md @@ -3,56 +3,88 @@ # Cohere -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +Chroma also provides a convenient wrapper around Cohere's embedding API. This embedding function runs remotely on Cohere’s servers, and requires an API key. You can get an API key by signing up for an account at [Cohere](https://dashboard.cohere.ai/welcome/register). -
Select a language
+
- - - - +| Models | Input | Dimensionality | Context Size| +|--|--|--|--|--| +|`embed-english-v3.0` | English | 1024 | 512 (recommended) | +|`embed-multilingual-v3.0` | [Full List](https://docs.cohere.com/docs/supported-languages) | 1024 | 512 (recommended) | +|`embed-english-light-v3.0` | English | 384 | 512 (recommended) | +|`embed-multilingual-light-v3.0` | [Full List](https://docs.cohere.com/docs/supported-languages) | 384 | 512 (recommended) | +|`embed-english-v2.0` | English | 4096 | 512 (recommended) | +|`embed-english-light-v2.0` | English | 1024 | 512 (recommended) | +|`embed-multilingual-v2.0` | [Full List](https://docs.cohere.com/docs/supported-languages) | 768 | 512 (recommended) | -Chroma also provides a convenient wrapper around Cohere's embedding API. This embedding function runs remotely on Cohere’s servers, and requires an API key. You can get an API key by signing up for an account at [Cohere](https://dashboard.cohere.ai/welcome/register). - - +## Basic Usage -This embedding function relies on the `cohere` python package, which you can install with `pip install cohere`. +### Python + +```bash +pip install cohere +``` ```python -cohere_ef = embedding_functions.CohereEmbeddingFunction(api_key="YOUR_API_KEY", model_name="large") -cohere_ef(texts=["document1","document2"]) + +from chromadb.utils import embedding_functions + +embedder = embedding_functions.CohereEmbeddingFunction( + api_key="YOUR_API_KEY") + +collection = client.create_collection( + name="cohere_ef", + embedding_function=embedder) ``` - - +### Javascript + +```bash +yarn add cohere-ai +``` ```javascript -const {CohereEmbeddingFunction} = require('chromadb'); -const embedder = new CohereEmbeddingFunction("apiKey") +import { ChromaClient, CohereEmbeddingFunction } from 'chromadb' -// use directly -const embeddings = embedder.generate(["document1","document2"]) +const embedder = new CohereEmbeddingFunction({ + apiKey: "YOUR_API_KEY" +}) -// pass documents to query for .add and .query -const collection = await client.createCollection({name: "name", embeddingFunction: embedder}) -const collectionGet = await client.getCollection({name:"name", embeddingFunction: embedder}) +const collection = await client.createCollection({ + name: "cohere_ef", + embeddingFunction: embedder +}) ``` - +## Advanced Usage - +### Call directly +By passing the embedding function to a Collection, Chroma handles the embedding of documents and queries for you. However in some cases you may want to generate the embeddings outside and handle them yourself. +#### Python + +```python +embeddings = embedder(["document1","document2"]) +# [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] +``` + +#### Javascript + +```javascript +const embeddings = embedder.generate(["document1","document2"]) +// [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] +``` + +### Using a different model You can pass in an optional `model_name` argument, which lets you choose which Cohere embeddings model to use. By default, Chroma uses `large` model. You can see the available models under `Get embeddings` section [here](https://docs.cohere.ai/reference/embed). ### Multilingual model example - - +#### Python ```python cohere_ef = embedding_functions.CohereEmbeddingFunction( @@ -69,11 +101,10 @@ cohere_ef(texts=multilingual_texts) ``` - - +#### Javascript ```javascript -const {CohereEmbeddingFunction} = require('chromadb'); +import { CohereEmbeddingFunction } from 'chromadb' const embedder = new CohereEmbeddingFunction("apiKey") multilingual_texts = [ 'Hello from Cohere!', 'مرحبًا من كوهير!', @@ -86,11 +117,4 @@ const embeddings = embedder.generate(multilingual_texts) ``` - - - - - - - For more information on multilingual model you can read [here](https://docs.cohere.ai/docs/multilingual-language-models). \ No newline at end of file diff --git a/docs/embeddings/openai.md b/docs/embeddings/openai.md index 986bb63..a0aba31 100644 --- a/docs/embeddings/openai.md +++ b/docs/embeddings/openai.md @@ -7,9 +7,9 @@ Chroma provides a convenient wrapper around OpenAI's embedding API. This embeddi
-| Models | Input | Dimensionality | Context Size| Default | +| Models | Input | Dimensionality | Context Size| |--|--|--|--|--| -|`ada-002` | English | 1536 | 2048 | ✅ | +|`ada-002` | English | 1536 | 2048 | ## Basic Usage diff --git a/src/css/custom.css b/src/css/custom.css index d65c93b..1efcb3e 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -596,6 +596,7 @@ div.data_table + table { border: none; padding-top: 20px; padding-bottom: 20px; + zoom: 0.8; } div.data_table + table thead {