From d717a36d4601258eb1024e46ee40297915da5208 Mon Sep 17 00:00:00 2001
From: Samuel Allan <samuel@opencraft.com>
Date: Tue, 26 Aug 2025 16:56:18 +0930
Subject: [PATCH] feat: add Typesense backend for search

Support searching forum threads and comments using Typesense as
a backend.

This is completely backwards compatible; Meilisearch can
still be used as the backend with no changes, and existing installations
will continue to be configured to use Meilisearch.

The new backend can be enabled by setting:

```py
TYPESENSE_ENABLED = True
```

You can also manually set the backend for testing with something like:

```py
FORUM_SEARCH_BACKEND = "forum.search.typesense.TypesenseBackend"
```

Configuring the Typesense backend can be done with these settings:

```py
TYPESENSE_API_KEY = "your-secret-api-key"
TYPESENSE_URLS = ["https://typesense-1.example.com:8108", "https://typesense-2.example.com:8108"]
```

The actual implementation differs from the Meilisearch backend
in some areas, mostly for internal efficiency.
For example, a single index is used to cover both threads and comments,
while Meilisearch uses two.
It aims to produce comparable results for searches by end users though.

Docs about this are being added to Open edX Documentation
in https://github.com/openedx/docs.openedx.org/pull/1376 ,
so that will be the source of truth.

Private-ref: https://tasks.opencraft.com/browse/BB-9975
---
 CHANGELOG.rst             |   2 +-
 forum/search/typesense.py | 441 ++++++++++++++++++++++++++++++++++++++
 forum/settings/common.py  |   8 +-
 forum/settings/test.py    |   4 +
 requirements/base.in      |   1 +
 requirements/base.txt     |   3 +
 requirements/ci.txt       |   3 +
 requirements/dev.txt      |   5 +
 requirements/doc.txt      |   3 +
 requirements/quality.txt  |   3 +
 requirements/test.txt     |   3 +
 tests/test_settings.py    |  18 ++
 tests/test_typesense.py   | 249 +++++++++++++++++++++
 13 files changed, 741 insertions(+), 2 deletions(-)
 create mode 100644 forum/search/typesense.py
 create mode 100644 tests/test_settings.py
 create mode 100644 tests/test_typesense.py

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 8e446d37..bd7b0897 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -14,7 +14,7 @@ Change Log
 Unreleased
 **********
 
-*
+* Add support for Typesense as the search backend.
 
 0.3.4 – 2025-08-13
 ******************
diff --git a/forum/search/typesense.py b/forum/search/typesense.py
new file mode 100644
index 00000000..d191c7b1
--- /dev/null
+++ b/forum/search/typesense.py
@@ -0,0 +1,441 @@
+"""
+Typesense backend for searching comments and threads.
+"""
+
+from typing import Any, Optional, cast
+
+from bs4 import BeautifulSoup
+from django.conf import settings
+from django.core.paginator import Paginator
+
+from typesense.client import Client
+from typesense.types.collection import CollectionCreateSchema
+from typesense.types.document import DocumentSchema, SearchParameters
+from typesense.exceptions import ObjectNotFound
+
+from forum.backends.mysql.models import Comment, CommentThread
+from forum.constants import FORUM_MAX_DEEP_SEARCH_COMMENT_COUNT
+from forum.search.base import (
+    BaseDocumentSearchBackend,
+    BaseIndexSearchBackend,
+    BaseSearchBackend,
+    BaseThreadSearchBackend,
+)
+
+_TYPESENSE_CLIENT: Client | None = None
+
+
+def get_typesense_client() -> Client:
+    """
+    Return a singleton Typesense client instance.
+    """
+    global _TYPESENSE_CLIENT
+    if _TYPESENSE_CLIENT is None:
+        _TYPESENSE_CLIENT = Client(
+            {
+                "api_key": settings.TYPESENSE_API_KEY,
+                "nodes": settings.TYPESENSE_URLS,
+            }
+        )
+    return _TYPESENSE_CLIENT
+
+
+def quote_filter_value(value: str) -> str:
+    """
+    Sanitize and safely quote a value for use in a Typesense filter.
+
+    https://typesense.org/docs/guide/tips-for-filtering.html#escaping-special-characters
+    """
+    return "`" + value.replace("`", "") + "`"
+
+
+def collection_name() -> str:
+    """
+    Generate the collection name to use in Typesense.
+    """
+    return settings.TYPESENSE_COLLECTION_PREFIX + "forum"
+
+
+def collection_schema() -> CollectionCreateSchema:
+    """
+    The schema to use for creating the collection.
+    """
+    return {
+        "name": collection_name(),
+        # NOTE: there's always an implicit "id" field
+        "fields": [
+            {"name": "thread_id", "type": "string"},
+            {"name": "course_id", "type": "string"},
+            {"name": "commentable_id", "type": "string"},
+            {"name": "context", "type": "string"},
+            {"name": "text", "type": "string"},
+        ],
+    }
+
+
+def expected_full_collection_schema() -> dict[str, Any]:
+    """
+    What is expected to be the full collection schema.
+
+    Use this to validate the actual schema from the server.
+    Note that Typesense may add new keys to the schema;
+    this is ok, and validation should still pass.
+    """
+    field_defaults = {
+        "facet": False,
+        "index": True,
+        "infix": False,
+        "locale": "",
+        "optional": False,
+        "sort": False,
+        "stem": False,
+        "stem_dictionary": "",
+        "store": True,
+        "type": "string",
+    }
+    return {
+        "default_sorting_field": "",
+        "enable_nested_fields": False,
+        "fields": [
+            {
+                **field_defaults,
+                "name": "thread_id",
+            },
+            {
+                **field_defaults,
+                "name": "course_id",
+            },
+            {
+                **field_defaults,
+                "name": "commentable_id",
+            },
+            {
+                **field_defaults,
+                "name": "context",
+            },
+            {
+                **field_defaults,
+                "name": "text",
+            },
+        ],
+        "name": collection_name(),
+        "symbols_to_index": [],
+        "token_separators": [],
+    }
+
+
+def document_from_thread(doc_id: str | int, data: dict[str, Any]) -> DocumentSchema:
+    """
+    Build a Typesense document from a thread's data.
+    """
+    return {
+        "id": f"thread-{doc_id}",
+        "thread_id": str(doc_id),
+        "course_id": str(data.get("course_id", "")),
+        "commentable_id": str(data.get("commentable_id", "")),
+        "context": str(data.get("context", "")),
+        "text": "{}\n{}".format(
+            str(data.get("title", "")),
+            (
+                BeautifulSoup(data["body"], features="html.parser").get_text()
+                if data.get("body")
+                else ""
+            ),
+        ),
+    }
+
+
+def document_from_comment(doc_id: str | int, data: dict[str, Any]) -> DocumentSchema:
+    """
+    Build a Typesense document from a comment's data.
+    """
+    # NOTE: Comments have no commentable_id or title, and the context is hardcoded to "course".
+    return {
+        "id": f"comment-{doc_id}",
+        "thread_id": str(data.get("comment_thread_id", "")),
+        "course_id": str(data.get("course_id", "")),
+        "commentable_id": "",
+        "context": str(data.get("context", "")),
+        "text": (
+            BeautifulSoup(data["body"], features="html.parser").get_text()
+            if data.get("body")
+            else ""
+        ),
+    }
+
+
+def build_search_parameters(
+    *,
+    search_text: str,
+    course_id: str | None,
+    context: str,
+    commentable_ids: list[str] | None,
+) -> SearchParameters:
+    """
+    Build Typesense search parameters for searching the index.
+    """
+    # `context` is always a single word,
+    # so we can gain performance without losing accuracy by using the faster `:` (non-exact) operator.
+    # See https://typesense.org/docs/29.0/api/search.html#filter-parameters for more information.
+    filters = [f"context:{quote_filter_value(context)}"]
+
+    if commentable_ids:
+        safe_ids = ", ".join(quote_filter_value(value) for value in commentable_ids)
+        filters.append(f"commentable_ids:[{safe_ids}]")
+
+    if course_id:
+        filters.append(f"course_id:={quote_filter_value(course_id)}")
+
+    return {
+        "q": search_text,
+        "query_by": "text",
+        "filter_by": " && ".join(filters),
+        "per_page": FORUM_MAX_DEEP_SEARCH_COMMENT_COUNT,
+    }
+
+
+class TypesenseDocumentBackend(BaseDocumentSearchBackend):
+    """
+    Document backend implementation for Typesense.
+    """
+
+    def index_document(
+        self, index_name: str, doc_id: str | int, document: dict[str, Any]
+    ) -> None:
+        """
+        Index a document in Typesense.
+        """
+        client = get_typesense_client()
+
+        if index_name == "comments":
+            typesense_document = document_from_comment(doc_id, document)
+        elif index_name == "comment_threads":
+            typesense_document = document_from_thread(doc_id, document)
+        else:
+            raise NotImplementedError(f"unknown index name: {index_name}")
+
+        client.collections[collection_name()].documents.upsert(typesense_document)
+
+    def update_document(
+        self, index_name: str, doc_id: str | int, update_data: dict[str, Any]
+    ) -> None:
+        """
+        Same operation as index_document, because upsert is used.
+        """
+        return self.index_document(index_name, doc_id, update_data)
+
+    def delete_document(self, index_name: str, doc_id: str | int) -> None:
+        """
+        Delete a document from Typesense.
+        """
+        client = get_typesense_client()
+        if index_name == "comments":
+            typesense_doc_id = f"comment-{doc_id}"
+        elif index_name == "comment_threads":
+            typesense_doc_id = f"thread-{doc_id}"
+        else:
+            raise NotImplementedError(f"unknown index name: {index_name}")
+
+        client.collections[collection_name()].documents[typesense_doc_id].delete(
+            delete_parameters={"ignore_not_found": True},
+        )
+
+
+class TypesenseIndexBackend(BaseIndexSearchBackend):
+    """
+    Manage indexes for the Typesense backend.
+
+    Typesense calls these "collections". https://typesense.org/docs/29.0/api/collections.html
+    """
+
+    def initialize_indices(self, force_new_index: bool = False) -> None:
+        """
+        Initialize the indices in Typesense.
+
+        If force_new_index is True, the indexes will be dropped before being recreated.
+        """
+        client = get_typesense_client()
+        name = collection_name()
+        exists: bool = True
+        try:
+            client.collections[name].retrieve()
+        except ObjectNotFound:
+            exists = False
+
+        if force_new_index and exists:
+            client.collections[name].delete()
+
+        if force_new_index or not exists:
+            client.collections.create(collection_schema())
+
+    def rebuild_indices(
+        self, batch_size: int = 500, extra_catchup_minutes: int = 5
+    ) -> None:
+        """
+        Reindex everything in Typesense
+
+        The Typesense collections are dropped and recreated,
+        and data is reindexed from the MySQL database.
+
+        Only MySQL-backed instances are supported.
+        Note that the `extra_catchup_minutes` argument is ignored.
+        """
+        client = get_typesense_client()
+        self.initialize_indices(force_new_index=True)
+
+        for model, document_builder in [
+            (CommentThread, document_from_thread),
+            (Comment, document_from_comment),
+        ]:
+            paginator = Paginator(
+                model.objects.order_by("pk").all(), per_page=batch_size
+            )
+            for page_number in paginator.page_range:
+                page = paginator.get_page(page_number)
+                documents = [
+                    document_builder(obj.pk, obj.doc_to_hash())
+                    for obj in page.object_list
+                ]
+                if documents:
+                    response = client.collections[collection_name()].documents.import_(
+                        documents, {"action": "upsert"}
+                    )
+                    if not all(result["success"] for result in response):
+                        raise ValueError(
+                            f"Errors while importing documents to Typesense collection: {response}"
+                        )
+
+    def validate_indices(self) -> None:
+        """
+        Check if the indices exist and are valid.
+
+        Raise an exception if any do not exist or if any are not valid.
+        Note that the validation is lengthy,
+        because Typesense may add new keys to the schema.
+        This is fine - we only want to assert that keys we know about are set as expected.
+        There are also some fields in the retrieved schema we don't care about - eg. 'created_at'
+        """
+        client = get_typesense_client()
+        # cast to a wider type, because we want to use it in a more flexible way than TypedDict normally allows.
+        actual_schema = cast(
+            dict[str, Any], client.collections[collection_name()].retrieve()
+        )
+        expected_schema = expected_full_collection_schema()
+        errors: list[str] = []
+
+        expected_field_names = set(
+            map(lambda field: field["name"], expected_schema["fields"])
+        )
+        actual_field_names = set(
+            map(lambda field: field["name"], actual_schema["fields"])
+        )
+
+        if missing_fields := expected_field_names - actual_field_names:
+            errors.append(
+                f"ERROR: '{collection_name()}' collection schema 'fields' has missing field(s): {missing_fields}."
+            )
+
+        if extra_fields := actual_field_names - expected_field_names:
+            errors.append(
+                f"ERROR: '{collection_name()}' collection schema 'fields' "
+                f"has unexpected extra field(s): {extra_fields}."
+            )
+
+        if actual_field_names == expected_field_names:
+            for expected_field, actual_field in zip(
+                sorted(expected_schema["fields"], key=lambda field: field["name"]),
+                sorted(actual_schema["fields"], key=lambda field: field["name"]),
+            ):
+                for key, expected_value in expected_field.items():
+                    if expected_value != actual_field[key]:
+                        errors.append(
+                            f"ERROR: in collection '{collection_name()}' fields, field '{expected_field['name']}', "
+                            f"key '{key}' failed to validate. "
+                            f"Expected: '{expected_value}', actual '{actual_field[key]}'."
+                        )
+
+        for key, expected_value in expected_schema.items():
+            if key == "fields":
+                # we've already validated fields separately above
+                continue
+
+            if expected_value != actual_schema[key]:
+                errors.append(
+                    f"ERROR: in collection '{collection_name()}', key '{key}' failed to validate. "
+                    f"Expected: '{expected_value}', actual '{actual_schema[key]}'."
+                )
+
+        if errors:
+            for error in errors:
+                print(error)
+            raise AssertionError("\n".join(errors))
+
+    def refresh_indices(self) -> None:
+        """
+        Noop on Typesense, as all write API operations are synchronous.
+
+        See https://typesense.org/docs/guide/migrating-from-algolia.html#synchronous-write-apis for more information.
+        """
+        return None
+
+    def delete_unused_indices(self) -> int:
+        """
+        Noop for this implementation.
+        """
+        return 0
+
+
+class TypesenseThreadSearchBackend(BaseThreadSearchBackend):
+    """
+    Thread search backend implementation for Typesense.
+    """
+
+    def get_thread_ids(
+        self,
+        context: str,
+        # This argument is unsupported. Anyway, its only role was to boost some results,
+        # which did not have much effect because they are shuffled anyway downstream.
+        group_ids: list[int],
+        search_text: str,
+        # This parameter is unsupported, but as far as we know it's not used anywhere.
+        sort_criteria: Optional[list[dict[str, str]]] = None,
+        commentable_ids: Optional[list[str]] = None,
+        course_id: Optional[str] = None,
+    ) -> list[str]:
+        """
+        Retrieve thread IDs based on search criteria.
+        """
+        client = get_typesense_client()
+
+        params = build_search_parameters(
+            search_text=search_text,
+            course_id=course_id,
+            context=context,
+            commentable_ids=commentable_ids,
+        )
+
+        results = client.collections[collection_name()].documents.search(params)
+        thread_ids: set[str] = {
+            hit["document"]["thread_id"] for hit in results.get("hits", [])  # type: ignore
+        }
+        return list(thread_ids)
+
+    def get_suggested_text(self, search_text: str) -> Optional[str]:
+        """
+        Retrieve text suggestions for a given search query.
+
+        :param search_text: Text to search for suggestions
+        :return: Suggested text or None
+        """
+        # Not implemented, so no suggestions.
+        return None
+
+
+class TypesenseBackend(BaseSearchBackend):
+    """
+    Typesense-powered search backend.
+    """
+
+    DOCUMENT_SEARCH_CLASS = TypesenseDocumentBackend
+    INDEX_SEARCH_CLASS = TypesenseIndexBackend
+    THREAD_SEARCH_CLASS = TypesenseThreadSearchBackend
diff --git a/forum/settings/common.py b/forum/settings/common.py
index dbecd9cd..772fc9e7 100644
--- a/forum/settings/common.py
+++ b/forum/settings/common.py
@@ -10,7 +10,13 @@ def plugin_settings(settings: Any) -> None:
     Common settings for forum app
     """
     # Search backend
-    if getattr(settings, "MEILISEARCH_ENABLED", False):
+    if getattr(settings, "TYPESENSE_ENABLED", False):
+        settings.FORUM_SEARCH_BACKEND = getattr(
+            settings,
+            "FORUM_SEARCH_BACKEND",
+            "forum.search.typesense.TypesenseBackend",
+        )
+    elif getattr(settings, "MEILISEARCH_ENABLED", False):
         settings.FORUM_SEARCH_BACKEND = getattr(
             settings,
             "FORUM_SEARCH_BACKEND",
diff --git a/forum/settings/test.py b/forum/settings/test.py
index efc1a292..7e8f1631 100644
--- a/forum/settings/test.py
+++ b/forum/settings/test.py
@@ -79,3 +79,7 @@ def root(*args: str) -> str:
 MEILISEARCH_API_KEY = "MEILISEARCH_MASTER_KEY"
 
 USE_TZ = True
+
+TYPESENSE_COLLECTION_PREFIX = "forum_unittest_prefix_"
+TYPESENSE_API_KEY = "example-typesense-api-key"
+TYPESENSE_URLS = ["http://0.0.0.0:8108"]
diff --git a/requirements/base.in b/requirements/base.in
index 7fc1fced..311e9e10 100644
--- a/requirements/base.in
+++ b/requirements/base.in
@@ -11,4 +11,5 @@ requests
 pymongo
 elasticsearch
 edx-search # meilisearch backend
+typesense
 mysqlclient
diff --git a/requirements/base.txt b/requirements/base.txt
index 449bdd19..689a1d76 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -141,6 +141,7 @@ requests==2.32.5
     # via
     #   -r requirements/base.in
     #   meilisearch
+    #   typesense
 six==1.17.0
     # via
     #   edx-ccx-keys
@@ -157,6 +158,8 @@ stevedore==5.5.0
     #   edx-opaque-keys
 text-unidecode==1.3
     # via python-slugify
+typesense==1.1.1
+    # via -r requirements/base.in
 typing-extensions==4.15.0
     # via
     #   beautifulsoup4
diff --git a/requirements/ci.txt b/requirements/ci.txt
index 748a1284..66df4121 100644
--- a/requirements/ci.txt
+++ b/requirements/ci.txt
@@ -431,6 +431,7 @@ requests==2.32.5
     #   meilisearch
     #   requests-toolbelt
     #   twine
+    #   typesense
 requests-toolbelt==1.0.0
     # via
     #   -r requirements/quality.txt
@@ -507,6 +508,8 @@ types-urllib3==1.26.25.14
     # via
     #   -r requirements/quality.txt
     #   types-requests
+typesense==1.1.1
+    # via -r requirements/quality.txt
 typing-extensions==4.15.0
     # via
     #   -r requirements/quality.txt
diff --git a/requirements/dev.txt b/requirements/dev.txt
index 5e01b88a..14346e94 100644
--- a/requirements/dev.txt
+++ b/requirements/dev.txt
@@ -574,6 +574,7 @@ requests==2.32.5
     #   meilisearch
     #   requests-toolbelt
     #   twine
+    #   typesense
 requests-toolbelt==1.0.0
     # via
     #   -r requirements/ci.txt
@@ -672,6 +673,10 @@ types-urllib3==1.26.25.14
     #   -r requirements/ci.txt
     #   -r requirements/quality.txt
     #   types-requests
+typesense==1.1.1
+    # via
+    #   -r requirements/ci.txt
+    #   -r requirements/quality.txt
 typing-extensions==4.15.0
     # via
     #   -r requirements/ci.txt
diff --git a/requirements/doc.txt b/requirements/doc.txt
index e48ac9c0..71dbab6e 100644
--- a/requirements/doc.txt
+++ b/requirements/doc.txt
@@ -374,6 +374,7 @@ requests==2.32.5
     #   requests-toolbelt
     #   sphinx
     #   twine
+    #   typesense
 requests-toolbelt==1.0.0
     # via
     #   -r requirements/test.txt
@@ -446,6 +447,8 @@ tox==4.28.4
     # via -r requirements/test.txt
 twine==6.1.0
     # via -r requirements/test.txt
+typesense==1.1.1
+    # via -r requirements/test.txt
 typing-extensions==4.15.0
     # via
     #   -r requirements/test.txt
diff --git a/requirements/quality.txt b/requirements/quality.txt
index fe0a2766..15ac1f9a 100644
--- a/requirements/quality.txt
+++ b/requirements/quality.txt
@@ -403,6 +403,7 @@ requests==2.32.5
     #   meilisearch
     #   requests-toolbelt
     #   twine
+    #   typesense
 requests-toolbelt==1.0.0
     # via
     #   -r requirements/test.txt
@@ -470,6 +471,8 @@ types-requests==2.31.0.6
     #   djangorestframework-stubs
 types-urllib3==1.26.25.14
     # via types-requests
+typesense==1.1.1
+    # via -r requirements/test.txt
 typing-extensions==4.15.0
     # via
     #   -r requirements/test.txt
diff --git a/requirements/test.txt b/requirements/test.txt
index 87b4932d..6fd4abe8 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -305,6 +305,7 @@ requests==2.32.5
     #   meilisearch
     #   requests-toolbelt
     #   twine
+    #   typesense
 requests-toolbelt==1.0.0
     # via twine
 rfc3986==2.0.0
@@ -343,6 +344,8 @@ tox==4.28.4
     # via -r requirements/test.in
 twine==6.1.0
     # via -r requirements/test.in
+typesense==1.1.1
+    # via -r requirements/base.txt
 typing-extensions==4.15.0
     # via
     #   -r requirements/base.txt
diff --git a/tests/test_settings.py b/tests/test_settings.py
new file mode 100644
index 00000000..ee44defe
--- /dev/null
+++ b/tests/test_settings.py
@@ -0,0 +1,18 @@
+"""
+Test functionality used in settings.
+"""
+
+from unittest.mock import MagicMock
+
+from forum.settings.production import plugin_settings
+
+
+def test_plugin_settings_typesense() -> None:
+    settings = MagicMock(
+        spec=["TYPESENSE_ENABLED", "FEATURES"], TYPESENSE_ENABLED=True, FEATURES={}
+    )
+
+    # This function changes settings in-place.
+    plugin_settings(settings)
+
+    assert settings.FORUM_SEARCH_BACKEND == "forum.search.typesense.TypesenseBackend"
diff --git a/tests/test_typesense.py b/tests/test_typesense.py
new file mode 100644
index 00000000..16da561e
--- /dev/null
+++ b/tests/test_typesense.py
@@ -0,0 +1,249 @@
+"""
+Unit tests for the typesense search backend.
+"""
+
+from unittest.mock import patch, MagicMock, Mock
+
+import pytest
+from typesense.exceptions import ObjectNotFound
+
+from forum.search import typesense
+from forum import constants
+
+
+def test_quote_filter() -> None:
+    """Verify quoting unsafe filter values works as expected."""
+    assert typesense.quote_filter_value("foo || true") == "`foo || true`"
+    assert typesense.quote_filter_value("foo` || true") == "`foo || true`"
+    assert typesense.quote_filter_value("mal`formed word[,]") == "`malformed word[,]`"
+
+
+def test_build_collection_name() -> None:
+    assert typesense.collection_name() == "forum_unittest_prefix_forum"
+
+
+def test_schemas() -> None:
+    """
+    A very basic test to check the schema functions don't crash.
+    The contents are not checked.
+    """
+    assert isinstance(typesense.collection_schema(), dict)
+    assert isinstance(typesense.expected_full_collection_schema(), dict)
+
+
+def test_document_from_thread() -> None:
+    doc_id = "MY_ID"
+    data = {
+        "course_id": "course-v1:OpenedX+DemoX+DemoCourse",
+        "commentable_id": 4,
+        "context": "course",
+        "title": "My Thoughts!",
+        "body": "<p><b>Thought one</b>: I like this course.</p>",
+    }
+
+    expected_document = {
+        "id": "thread-MY_ID",
+        "thread_id": "MY_ID",
+        "course_id": "course-v1:OpenedX+DemoX+DemoCourse",
+        "commentable_id": "4",
+        "context": "course",
+        "text": "My Thoughts!\nThought one: I like this course.",
+    }
+
+    assert typesense.document_from_thread(doc_id, data) == expected_document
+
+
+def test_document_from_comment() -> None:
+    doc_id = "MY_ID"
+    data = {
+        "course_id": "course-v1:OpenedX+DemoX+DemoCourse",
+        "comment_thread_id": 6,
+        "context": "course",
+        "body": "<p><b>Another thought</b>: I also like this course.</p>",
+    }
+
+    expected_document = {
+        "id": "comment-MY_ID",
+        "thread_id": "6",
+        "course_id": "course-v1:OpenedX+DemoX+DemoCourse",
+        "commentable_id": "",
+        "context": "course",
+        "text": "Another thought: I also like this course.",
+    }
+
+    assert typesense.document_from_comment(doc_id, data) == expected_document
+
+
+@patch("forum.search.typesense.get_typesense_client")
+def test_search_threads(mock_get_client: Mock) -> None:
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+    mock_search = mock_client.collections[
+        "forum_unittest_prefix_forum"
+    ].documents.search
+    mock_search.return_value = {
+        "hits": [{"document": {"thread_id": "ONE"}}, {"document": {"thread_id": "TWO"}}]
+    }
+
+    backend = typesense.TypesenseThreadSearchBackend()
+    assert sorted(
+        backend.get_thread_ids(
+            context="course",
+            group_ids=[],
+            search_text="thoughts",
+            commentable_ids=["4", "7[`||"],
+            course_id="course-v1:OpenedX+DemoX+DemoCourse",
+        )
+    ) == sorted(["ONE", "TWO"])
+
+    # test build_search_paramaters() here too; important to verify it backtick escapes the values
+    expected_params = {
+        "q": "thoughts",
+        "query_by": "text",
+        "filter_by": "context:`course` && commentable_ids:[`4`, `7[||`] "
+        "&& course_id:=`course-v1:OpenedX+DemoX+DemoCourse`",
+        "per_page": constants.FORUM_MAX_DEEP_SEARCH_COMMENT_COUNT,
+    }
+    mock_search.assert_called_once_with(expected_params)
+
+    # suggested text is not supported; always returns None
+    assert backend.get_suggested_text("foo") is None
+
+
+@patch("forum.search.typesense.get_typesense_client")
+def test_index_comment_document(mock_get_client: Mock) -> None:
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+    mock_index = mock_client.collections["forum_unittest_prefix_forum"].documents.upsert
+
+    doc_id = "MY_ID"
+    data = {
+        "course_id": "course-v1:OpenedX+DemoX+DemoCourse",
+        "comment_thread_id": 6,
+        "context": "course",
+        "body": "<p><b>Another thought</b>: I also like this course.</p>",
+    }
+    expected_document = {
+        "id": "comment-MY_ID",
+        "thread_id": "6",
+        "course_id": "course-v1:OpenedX+DemoX+DemoCourse",
+        "commentable_id": "",
+        "context": "course",
+        "text": "Another thought: I also like this course.",
+    }
+
+    backend = typesense.TypesenseDocumentBackend()
+    backend.index_document("comments", doc_id, data)
+    mock_index.assert_called_once_with(expected_document)
+
+
+@patch("forum.search.typesense.get_typesense_client")
+def test_index_thread_document(mock_get_client: Mock) -> None:
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+    mock_index = mock_client.collections["forum_unittest_prefix_forum"].documents.upsert
+
+    doc_id = "MY_ID"
+    data = {
+        "course_id": "course-v1:OpenedX+DemoX+DemoCourse",
+        "commentable_id": 4,
+        "context": "course",
+        "title": "My Thoughts!",
+        "body": "<p><b>Thought one</b>: I like this course.</p>",
+    }
+
+    expected_document = {
+        "id": "thread-MY_ID",
+        "thread_id": "MY_ID",
+        "course_id": "course-v1:OpenedX+DemoX+DemoCourse",
+        "commentable_id": "4",
+        "context": "course",
+        "text": "My Thoughts!\nThought one: I like this course.",
+    }
+
+    backend = typesense.TypesenseDocumentBackend()
+    backend.index_document("comment_threads", doc_id, data)
+    mock_index.assert_called_once_with(expected_document)
+
+
+@patch("forum.search.typesense.get_typesense_client", MagicMock())
+def test_index_invalid_type() -> None:
+    backend = typesense.TypesenseDocumentBackend()
+    with pytest.raises(NotImplementedError):
+        backend.index_document("foo", "DOCID", {})
+
+
+@patch("forum.search.typesense.get_typesense_client")
+def test_delete_document(mock_get_client: Mock) -> None:
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+    mock_delete = (
+        mock_client.collections["forum_unittest_prefix_forum"]
+        .documents["comment-MYCOMMENTID"]
+        .delete
+    )
+
+    backend = typesense.TypesenseDocumentBackend()
+    backend.delete_document("comments", "MYCOMMENTID")
+    mock_delete.assert_called_once()
+
+
+@patch("forum.search.typesense.get_typesense_client")
+def test_init_indexes_already_exist(mock_get_client: Mock) -> None:
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+    mock_collection = mock_client.collections["forum_unittest_prefix_forum"]
+    mock_collection.retrieve.return_value = {"data": "irrelevant but index exists"}
+
+    backend = typesense.TypesenseIndexBackend()
+    backend.initialize_indices()
+
+    mock_collection.delete.assert_not_called()
+    mock_client.collections.create.assert_not_called()
+
+
+@patch("forum.search.typesense.get_typesense_client")
+def test_init_indexes_already_exist_force(mock_get_client: Mock) -> None:
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+    mock_collection = mock_client.collections["forum_unittest_prefix_forum"]
+    mock_collection.retrieve.return_value = {"data": "irrelevant but index exists"}
+
+    backend = typesense.TypesenseIndexBackend()
+    backend.initialize_indices(force_new_index=True)
+
+    mock_collection.delete.assert_called_once()
+    mock_client.collections.create.assert_called_once()
+
+
+@patch("forum.search.typesense.get_typesense_client")
+def test_init_indexes_does_not_exist(mock_get_client: Mock) -> None:
+    mock_client = MagicMock()
+    mock_get_client.return_value = mock_client
+    mock_collection = mock_client.collections["forum_unittest_prefix_forum"]
+    mock_collection.retrieve.side_effect = ObjectNotFound
+
+    backend = typesense.TypesenseIndexBackend()
+    backend.initialize_indices()
+
+    mock_collection.delete.assert_not_called()
+    mock_client.collections.create.assert_called_once()
+
+
+def test_index_noops() -> None:
+    """
+    These methods should have no effect and require no mocks.
+
+    They are noops on the Typesense backend.
+    """
+    backend = typesense.TypesenseIndexBackend()
+    backend.refresh_indices()
+    assert backend.delete_unused_indices() == 0
+
+
+def test_get_client() -> None:
+    client1 = typesense.get_typesense_client()
+    client2 = typesense.get_typesense_client()
+
+    assert client1 is client2
+    assert client1.config.api_key == "example-typesense-api-key"