From 232aaabd320202d1cf4fa7f1f1cc4e7bc8c30f3c Mon Sep 17 00:00:00 2001 From: Basil Sunny Date: Sun, 25 Feb 2024 09:06:52 +0530 Subject: [PATCH 1/5] docs: add universal sentence encoder embedding function --- docs/embeddings.md | 1 + docs/embeddings/universal-sentence-encoder.md | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 docs/embeddings/universal-sentence-encoder.md diff --git a/docs/embeddings.md b/docs/embeddings.md index 0d2f37b..d7261d8 100644 --- a/docs/embeddings.md +++ b/docs/embeddings.md @@ -20,6 +20,7 @@ Chroma provides lightweight wrappers around popular embedding providers, making | [Instructor](/embeddings/instructor) | ✅ | ➖ | | [Hugging Face Embedding Server](/embeddings/hugging-face-embedding-server) | ✅ | ✅ | | [Jina AI](/embeddings/jinaai) | ✅ | ✅ | +| [Universal Sentence Encoder](/embeddings/universal-sentence-encoder) | ✅ | ➖ | We welcome pull requests to add new Embedding Functions to the community. diff --git a/docs/embeddings/universal-sentence-encoder.md b/docs/embeddings/universal-sentence-encoder.md new file mode 100644 index 0000000..17fe574 --- /dev/null +++ b/docs/embeddings/universal-sentence-encoder.md @@ -0,0 +1,34 @@ +--- +--- + +# Universal Sentence Encoder + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +
Select a language
+ + + + + + + + + +Chroma also provides a convenient wrapper around [Universal Sentence Encoder](https://research.google.com/pubs/archive/46808.pdf). This embedding function uses `https://tfhub.dev/google/universal-sentence-encoder/4` available on Tensorflow Hub. + +This embedding function relies on the `tensforflow_hub` python package, which you can install with `pip install tensforflow_hub`. + +```python +import chromadb.utils.embedding_functions as embedding_functions +huggingface_ef = embedding_functions.UniversalSentenceEncoderEmbeddingFunction() +``` + +You can pass in an optional `model_name` argument, which lets you choose which model to use. By default, Chroma uses `[https://tfhub.dev/google/universal-sentence-encoder/4](https://tfhub.dev/google/universal-sentence-encoder/4)`. + + + + Support for [Universal Sentence Encoder](https://research.google.com/pubs/archive/46808.pdf) embedding function is not implemented yet. Feel free to contribute by following the doc: [Custom Embedding Functions](https://docs.trychroma.com/embeddings?lang=js) + + From e11f064bb6291e2bd653e5a47efbd0ff8308fccf Mon Sep 17 00:00:00 2001 From: Basil Sunny Date: Sun, 25 Feb 2024 09:29:02 +0530 Subject: [PATCH 2/5] fix: bad fromated link --- docs/embeddings/universal-sentence-encoder.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/embeddings/universal-sentence-encoder.md b/docs/embeddings/universal-sentence-encoder.md index 17fe574..6e2d0f0 100644 --- a/docs/embeddings/universal-sentence-encoder.md +++ b/docs/embeddings/universal-sentence-encoder.md @@ -16,7 +16,9 @@ import TabItem from '@theme/TabItem'; -Chroma also provides a convenient wrapper around [Universal Sentence Encoder](https://research.google.com/pubs/archive/46808.pdf). This embedding function uses `https://tfhub.dev/google/universal-sentence-encoder/4` available on Tensorflow Hub. +Chroma also provides a convenient wrapper around [Universal Sentence Encoder](https://research.google.com/pubs/archive/46808.pdf). + +This embedding function uses `https://tfhub.dev/google/universal-sentence-encoder/4` available on Tensorflow Hub This embedding function relies on the `tensforflow_hub` python package, which you can install with `pip install tensforflow_hub`. @@ -25,7 +27,7 @@ import chromadb.utils.embedding_functions as embedding_functions huggingface_ef = embedding_functions.UniversalSentenceEncoderEmbeddingFunction() ``` -You can pass in an optional `model_name` argument, which lets you choose which model to use. By default, Chroma uses `[https://tfhub.dev/google/universal-sentence-encoder/4](https://tfhub.dev/google/universal-sentence-encoder/4)`. +You can pass in an optional `model_name` argument, which lets you choose which model to use. By default, Chroma uses [https://tfhub.dev/google/universal-sentence-encoder/4](https://tfhub.dev/google/universal-sentence-encoder/4). From 28fe77cddcf7f3a87c1650105402544d53a92fcf Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 11 Apr 2024 19:51:21 +0300 Subject: [PATCH 3/5] Update sidebars.js --- sidebars.js | 1 + 1 file changed, 1 insertion(+) diff --git a/sidebars.js b/sidebars.js index 0f613ab..00bdb93 100644 --- a/sidebars.js +++ b/sidebars.js @@ -95,6 +95,7 @@ const sidebars = { 'embeddings/roboflow-api', 'embeddings/hugging-face-embedding-server', 'embeddings/jinaai', + 'embeddings/universal-sentence-encoder', ], }, ], From 03135094a0b3c773d2783f4b8ce56c88f3b3a5b5 Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Thu, 11 Apr 2024 20:46:36 +0300 Subject: [PATCH 4/5] fix: Addressed comments from Jeff + fixed a sidebar issue --- docs/embeddings/universal-sentence-encoder.md | 13 +++++++++++-- sidebars.js | 1 - 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/embeddings/universal-sentence-encoder.md b/docs/embeddings/universal-sentence-encoder.md index 6e2d0f0..04f0ba6 100644 --- a/docs/embeddings/universal-sentence-encoder.md +++ b/docs/embeddings/universal-sentence-encoder.md @@ -16,21 +16,30 @@ import TabItem from '@theme/TabItem'; + Chroma also provides a convenient wrapper around [Universal Sentence Encoder](https://research.google.com/pubs/archive/46808.pdf). -This embedding function uses `https://tfhub.dev/google/universal-sentence-encoder/4` available on Tensorflow Hub +This embedding function uses models hosted on [Tensorflow Hub](https://tfhub.dev/). This embedding function relies on the `tensforflow_hub` python package, which you can install with `pip install tensforflow_hub`. ```python import chromadb.utils.embedding_functions as embedding_functions huggingface_ef = embedding_functions.UniversalSentenceEncoderEmbeddingFunction() + +huggingface_ef([ + "The quick brown fox jumps over the lazy dog.", + "I am a sentence for which I would like to get its embedding"]) + ``` -You can pass in an optional `model_name` argument, which lets you choose which model to use. By default, Chroma uses [https://tfhub.dev/google/universal-sentence-encoder/4](https://tfhub.dev/google/universal-sentence-encoder/4). + +You can pass in an optional `model_name` argument, which lets you choose which model to use. By default, Chroma uses [Universal Sentence Encoder](https://tfhub.dev/google/universal-sentence-encoder/4) - `https://tfhub.dev/google/universal-sentence-encoder/4` + Support for [Universal Sentence Encoder](https://research.google.com/pubs/archive/46808.pdf) embedding function is not implemented yet. Feel free to contribute by following the doc: [Custom Embedding Functions](https://docs.trychroma.com/embeddings?lang=js) + diff --git a/sidebars.js b/sidebars.js index 00bdb93..576baa0 100644 --- a/sidebars.js +++ b/sidebars.js @@ -93,7 +93,6 @@ const sidebars = { 'embeddings/hugging-face-embedding-server', 'embeddings/instructor', 'embeddings/roboflow-api', - 'embeddings/hugging-face-embedding-server', 'embeddings/jinaai', 'embeddings/universal-sentence-encoder', ], From 9f258353404bf3b312a00b4a17c33e0626ccd69e Mon Sep 17 00:00:00 2001 From: Basil Sunny Date: Wed, 17 Apr 2024 14:07:12 +0530 Subject: [PATCH 5/5] fix: broken markdown links --- docs/embeddings/universal-sentence-encoder.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/embeddings/universal-sentence-encoder.md b/docs/embeddings/universal-sentence-encoder.md index 04f0ba6..f442e3a 100644 --- a/docs/embeddings/universal-sentence-encoder.md +++ b/docs/embeddings/universal-sentence-encoder.md @@ -17,7 +17,7 @@ import TabItem from '@theme/TabItem'; -Chroma also provides a convenient wrapper around [Universal Sentence Encoder](https://research.google.com/pubs/archive/46808.pdf). +Chroma also provides a convenient wrapper around [Universal Sentence Encoder](https://research.google.com/pubs/archive/46808.pdf) This embedding function uses models hosted on [Tensorflow Hub](https://tfhub.dev/). @@ -34,8 +34,7 @@ huggingface_ef([ ``` -You can pass in an optional `model_name` argument, which lets you choose which model to use. By default, Chroma uses [Universal Sentence Encoder](https://tfhub.dev/google/universal-sentence-encoder/4) - `https://tfhub.dev/google/universal-sentence-encoder/4` - +You can pass in an optional `model_name` argument, which lets you choose which model to use. By default, Chroma uses [Universal Sentence Encoder 4](https://tfhub.dev/google/universal-sentence-encoder/4) provided by Tensorflow Hub