diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8e446d37..bd7b0897 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -14,7 +14,7 @@ Change Log Unreleased ********** -* +* Add support for Typesense as the search backend. 0.3.4 – 2025-08-13 ****************** diff --git a/forum/search/typesense.py b/forum/search/typesense.py new file mode 100644 index 00000000..d191c7b1 --- /dev/null +++ b/forum/search/typesense.py @@ -0,0 +1,441 @@ +""" +Typesense backend for searching comments and threads. +""" + +from typing import Any, Optional, cast + +from bs4 import BeautifulSoup +from django.conf import settings +from django.core.paginator import Paginator + +from typesense.client import Client +from typesense.types.collection import CollectionCreateSchema +from typesense.types.document import DocumentSchema, SearchParameters +from typesense.exceptions import ObjectNotFound + +from forum.backends.mysql.models import Comment, CommentThread +from forum.constants import FORUM_MAX_DEEP_SEARCH_COMMENT_COUNT +from forum.search.base import ( + BaseDocumentSearchBackend, + BaseIndexSearchBackend, + BaseSearchBackend, + BaseThreadSearchBackend, +) + +_TYPESENSE_CLIENT: Client | None = None + + +def get_typesense_client() -> Client: + """ + Return a singleton Typesense client instance. + """ + global _TYPESENSE_CLIENT + if _TYPESENSE_CLIENT is None: + _TYPESENSE_CLIENT = Client( + { + "api_key": settings.TYPESENSE_API_KEY, + "nodes": settings.TYPESENSE_URLS, + } + ) + return _TYPESENSE_CLIENT + + +def quote_filter_value(value: str) -> str: + """ + Sanitize and safely quote a value for use in a Typesense filter. + + https://typesense.org/docs/guide/tips-for-filtering.html#escaping-special-characters + """ + return "`" + value.replace("`", "") + "`" + + +def collection_name() -> str: + """ + Generate the collection name to use in Typesense. + """ + return settings.TYPESENSE_COLLECTION_PREFIX + "forum" + + +def collection_schema() -> CollectionCreateSchema: + """ + The schema to use for creating the collection. + """ + return { + "name": collection_name(), + # NOTE: there's always an implicit "id" field + "fields": [ + {"name": "thread_id", "type": "string"}, + {"name": "course_id", "type": "string"}, + {"name": "commentable_id", "type": "string"}, + {"name": "context", "type": "string"}, + {"name": "text", "type": "string"}, + ], + } + + +def expected_full_collection_schema() -> dict[str, Any]: + """ + What is expected to be the full collection schema. + + Use this to validate the actual schema from the server. + Note that Typesense may add new keys to the schema; + this is ok, and validation should still pass. + """ + field_defaults = { + "facet": False, + "index": True, + "infix": False, + "locale": "", + "optional": False, + "sort": False, + "stem": False, + "stem_dictionary": "", + "store": True, + "type": "string", + } + return { + "default_sorting_field": "", + "enable_nested_fields": False, + "fields": [ + { + **field_defaults, + "name": "thread_id", + }, + { + **field_defaults, + "name": "course_id", + }, + { + **field_defaults, + "name": "commentable_id", + }, + { + **field_defaults, + "name": "context", + }, + { + **field_defaults, + "name": "text", + }, + ], + "name": collection_name(), + "symbols_to_index": [], + "token_separators": [], + } + + +def document_from_thread(doc_id: str | int, data: dict[str, Any]) -> DocumentSchema: + """ + Build a Typesense document from a thread's data. + """ + return { + "id": f"thread-{doc_id}", + "thread_id": str(doc_id), + "course_id": str(data.get("course_id", "")), + "commentable_id": str(data.get("commentable_id", "")), + "context": str(data.get("context", "")), + "text": "{}\n{}".format( + str(data.get("title", "")), + ( + BeautifulSoup(data["body"], features="html.parser").get_text() + if data.get("body") + else "" + ), + ), + } + + +def document_from_comment(doc_id: str | int, data: dict[str, Any]) -> DocumentSchema: + """ + Build a Typesense document from a comment's data. + """ + # NOTE: Comments have no commentable_id or title, and the context is hardcoded to "course". + return { + "id": f"comment-{doc_id}", + "thread_id": str(data.get("comment_thread_id", "")), + "course_id": str(data.get("course_id", "")), + "commentable_id": "", + "context": str(data.get("context", "")), + "text": ( + BeautifulSoup(data["body"], features="html.parser").get_text() + if data.get("body") + else "" + ), + } + + +def build_search_parameters( + *, + search_text: str, + course_id: str | None, + context: str, + commentable_ids: list[str] | None, +) -> SearchParameters: + """ + Build Typesense search parameters for searching the index. + """ + # `context` is always a single word, + # so we can gain performance without losing accuracy by using the faster `:` (non-exact) operator. + # See https://typesense.org/docs/29.0/api/search.html#filter-parameters for more information. + filters = [f"context:{quote_filter_value(context)}"] + + if commentable_ids: + safe_ids = ", ".join(quote_filter_value(value) for value in commentable_ids) + filters.append(f"commentable_ids:[{safe_ids}]") + + if course_id: + filters.append(f"course_id:={quote_filter_value(course_id)}") + + return { + "q": search_text, + "query_by": "text", + "filter_by": " && ".join(filters), + "per_page": FORUM_MAX_DEEP_SEARCH_COMMENT_COUNT, + } + + +class TypesenseDocumentBackend(BaseDocumentSearchBackend): + """ + Document backend implementation for Typesense. + """ + + def index_document( + self, index_name: str, doc_id: str | int, document: dict[str, Any] + ) -> None: + """ + Index a document in Typesense. + """ + client = get_typesense_client() + + if index_name == "comments": + typesense_document = document_from_comment(doc_id, document) + elif index_name == "comment_threads": + typesense_document = document_from_thread(doc_id, document) + else: + raise NotImplementedError(f"unknown index name: {index_name}") + + client.collections[collection_name()].documents.upsert(typesense_document) + + def update_document( + self, index_name: str, doc_id: str | int, update_data: dict[str, Any] + ) -> None: + """ + Same operation as index_document, because upsert is used. + """ + return self.index_document(index_name, doc_id, update_data) + + def delete_document(self, index_name: str, doc_id: str | int) -> None: + """ + Delete a document from Typesense. + """ + client = get_typesense_client() + if index_name == "comments": + typesense_doc_id = f"comment-{doc_id}" + elif index_name == "comment_threads": + typesense_doc_id = f"thread-{doc_id}" + else: + raise NotImplementedError(f"unknown index name: {index_name}") + + client.collections[collection_name()].documents[typesense_doc_id].delete( + delete_parameters={"ignore_not_found": True}, + ) + + +class TypesenseIndexBackend(BaseIndexSearchBackend): + """ + Manage indexes for the Typesense backend. + + Typesense calls these "collections". https://typesense.org/docs/29.0/api/collections.html + """ + + def initialize_indices(self, force_new_index: bool = False) -> None: + """ + Initialize the indices in Typesense. + + If force_new_index is True, the indexes will be dropped before being recreated. + """ + client = get_typesense_client() + name = collection_name() + exists: bool = True + try: + client.collections[name].retrieve() + except ObjectNotFound: + exists = False + + if force_new_index and exists: + client.collections[name].delete() + + if force_new_index or not exists: + client.collections.create(collection_schema()) + + def rebuild_indices( + self, batch_size: int = 500, extra_catchup_minutes: int = 5 + ) -> None: + """ + Reindex everything in Typesense + + The Typesense collections are dropped and recreated, + and data is reindexed from the MySQL database. + + Only MySQL-backed instances are supported. + Note that the `extra_catchup_minutes` argument is ignored. + """ + client = get_typesense_client() + self.initialize_indices(force_new_index=True) + + for model, document_builder in [ + (CommentThread, document_from_thread), + (Comment, document_from_comment), + ]: + paginator = Paginator( + model.objects.order_by("pk").all(), per_page=batch_size + ) + for page_number in paginator.page_range: + page = paginator.get_page(page_number) + documents = [ + document_builder(obj.pk, obj.doc_to_hash()) + for obj in page.object_list + ] + if documents: + response = client.collections[collection_name()].documents.import_( + documents, {"action": "upsert"} + ) + if not all(result["success"] for result in response): + raise ValueError( + f"Errors while importing documents to Typesense collection: {response}" + ) + + def validate_indices(self) -> None: + """ + Check if the indices exist and are valid. + + Raise an exception if any do not exist or if any are not valid. + Note that the validation is lengthy, + because Typesense may add new keys to the schema. + This is fine - we only want to assert that keys we know about are set as expected. + There are also some fields in the retrieved schema we don't care about - eg. 'created_at' + """ + client = get_typesense_client() + # cast to a wider type, because we want to use it in a more flexible way than TypedDict normally allows. + actual_schema = cast( + dict[str, Any], client.collections[collection_name()].retrieve() + ) + expected_schema = expected_full_collection_schema() + errors: list[str] = [] + + expected_field_names = set( + map(lambda field: field["name"], expected_schema["fields"]) + ) + actual_field_names = set( + map(lambda field: field["name"], actual_schema["fields"]) + ) + + if missing_fields := expected_field_names - actual_field_names: + errors.append( + f"ERROR: '{collection_name()}' collection schema 'fields' has missing field(s): {missing_fields}." + ) + + if extra_fields := actual_field_names - expected_field_names: + errors.append( + f"ERROR: '{collection_name()}' collection schema 'fields' " + f"has unexpected extra field(s): {extra_fields}." + ) + + if actual_field_names == expected_field_names: + for expected_field, actual_field in zip( + sorted(expected_schema["fields"], key=lambda field: field["name"]), + sorted(actual_schema["fields"], key=lambda field: field["name"]), + ): + for key, expected_value in expected_field.items(): + if expected_value != actual_field[key]: + errors.append( + f"ERROR: in collection '{collection_name()}' fields, field '{expected_field['name']}', " + f"key '{key}' failed to validate. " + f"Expected: '{expected_value}', actual '{actual_field[key]}'." + ) + + for key, expected_value in expected_schema.items(): + if key == "fields": + # we've already validated fields separately above + continue + + if expected_value != actual_schema[key]: + errors.append( + f"ERROR: in collection '{collection_name()}', key '{key}' failed to validate. " + f"Expected: '{expected_value}', actual '{actual_schema[key]}'." + ) + + if errors: + for error in errors: + print(error) + raise AssertionError("\n".join(errors)) + + def refresh_indices(self) -> None: + """ + Noop on Typesense, as all write API operations are synchronous. + + See https://typesense.org/docs/guide/migrating-from-algolia.html#synchronous-write-apis for more information. + """ + return None + + def delete_unused_indices(self) -> int: + """ + Noop for this implementation. + """ + return 0 + + +class TypesenseThreadSearchBackend(BaseThreadSearchBackend): + """ + Thread search backend implementation for Typesense. + """ + + def get_thread_ids( + self, + context: str, + # This argument is unsupported. Anyway, its only role was to boost some results, + # which did not have much effect because they are shuffled anyway downstream. + group_ids: list[int], + search_text: str, + # This parameter is unsupported, but as far as we know it's not used anywhere. + sort_criteria: Optional[list[dict[str, str]]] = None, + commentable_ids: Optional[list[str]] = None, + course_id: Optional[str] = None, + ) -> list[str]: + """ + Retrieve thread IDs based on search criteria. + """ + client = get_typesense_client() + + params = build_search_parameters( + search_text=search_text, + course_id=course_id, + context=context, + commentable_ids=commentable_ids, + ) + + results = client.collections[collection_name()].documents.search(params) + thread_ids: set[str] = { + hit["document"]["thread_id"] for hit in results.get("hits", []) # type: ignore + } + return list(thread_ids) + + def get_suggested_text(self, search_text: str) -> Optional[str]: + """ + Retrieve text suggestions for a given search query. + + :param search_text: Text to search for suggestions + :return: Suggested text or None + """ + # Not implemented, so no suggestions. + return None + + +class TypesenseBackend(BaseSearchBackend): + """ + Typesense-powered search backend. + """ + + DOCUMENT_SEARCH_CLASS = TypesenseDocumentBackend + INDEX_SEARCH_CLASS = TypesenseIndexBackend + THREAD_SEARCH_CLASS = TypesenseThreadSearchBackend diff --git a/forum/settings/common.py b/forum/settings/common.py index dbecd9cd..772fc9e7 100644 --- a/forum/settings/common.py +++ b/forum/settings/common.py @@ -10,7 +10,13 @@ def plugin_settings(settings: Any) -> None: Common settings for forum app """ # Search backend - if getattr(settings, "MEILISEARCH_ENABLED", False): + if getattr(settings, "TYPESENSE_ENABLED", False): + settings.FORUM_SEARCH_BACKEND = getattr( + settings, + "FORUM_SEARCH_BACKEND", + "forum.search.typesense.TypesenseBackend", + ) + elif getattr(settings, "MEILISEARCH_ENABLED", False): settings.FORUM_SEARCH_BACKEND = getattr( settings, "FORUM_SEARCH_BACKEND", diff --git a/forum/settings/test.py b/forum/settings/test.py index efc1a292..7e8f1631 100644 --- a/forum/settings/test.py +++ b/forum/settings/test.py @@ -79,3 +79,7 @@ def root(*args: str) -> str: MEILISEARCH_API_KEY = "MEILISEARCH_MASTER_KEY" USE_TZ = True + +TYPESENSE_COLLECTION_PREFIX = "forum_unittest_prefix_" +TYPESENSE_API_KEY = "example-typesense-api-key" +TYPESENSE_URLS = ["http://0.0.0.0:8108"] diff --git a/requirements/base.in b/requirements/base.in index 7fc1fced..311e9e10 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -11,4 +11,5 @@ requests pymongo elasticsearch edx-search # meilisearch backend +typesense mysqlclient diff --git a/requirements/base.txt b/requirements/base.txt index 449bdd19..689a1d76 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -141,6 +141,7 @@ requests==2.32.5 # via # -r requirements/base.in # meilisearch + # typesense six==1.17.0 # via # edx-ccx-keys @@ -157,6 +158,8 @@ stevedore==5.5.0 # edx-opaque-keys text-unidecode==1.3 # via python-slugify +typesense==1.1.1 + # via -r requirements/base.in typing-extensions==4.15.0 # via # beautifulsoup4 diff --git a/requirements/ci.txt b/requirements/ci.txt index 748a1284..66df4121 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -431,6 +431,7 @@ requests==2.32.5 # meilisearch # requests-toolbelt # twine + # typesense requests-toolbelt==1.0.0 # via # -r requirements/quality.txt @@ -507,6 +508,8 @@ types-urllib3==1.26.25.14 # via # -r requirements/quality.txt # types-requests +typesense==1.1.1 + # via -r requirements/quality.txt typing-extensions==4.15.0 # via # -r requirements/quality.txt diff --git a/requirements/dev.txt b/requirements/dev.txt index 5e01b88a..14346e94 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -574,6 +574,7 @@ requests==2.32.5 # meilisearch # requests-toolbelt # twine + # typesense requests-toolbelt==1.0.0 # via # -r requirements/ci.txt @@ -672,6 +673,10 @@ types-urllib3==1.26.25.14 # -r requirements/ci.txt # -r requirements/quality.txt # types-requests +typesense==1.1.1 + # via + # -r requirements/ci.txt + # -r requirements/quality.txt typing-extensions==4.15.0 # via # -r requirements/ci.txt diff --git a/requirements/doc.txt b/requirements/doc.txt index e48ac9c0..71dbab6e 100644 --- a/requirements/doc.txt +++ b/requirements/doc.txt @@ -374,6 +374,7 @@ requests==2.32.5 # requests-toolbelt # sphinx # twine + # typesense requests-toolbelt==1.0.0 # via # -r requirements/test.txt @@ -446,6 +447,8 @@ tox==4.28.4 # via -r requirements/test.txt twine==6.1.0 # via -r requirements/test.txt +typesense==1.1.1 + # via -r requirements/test.txt typing-extensions==4.15.0 # via # -r requirements/test.txt diff --git a/requirements/quality.txt b/requirements/quality.txt index fe0a2766..15ac1f9a 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -403,6 +403,7 @@ requests==2.32.5 # meilisearch # requests-toolbelt # twine + # typesense requests-toolbelt==1.0.0 # via # -r requirements/test.txt @@ -470,6 +471,8 @@ types-requests==2.31.0.6 # djangorestframework-stubs types-urllib3==1.26.25.14 # via types-requests +typesense==1.1.1 + # via -r requirements/test.txt typing-extensions==4.15.0 # via # -r requirements/test.txt diff --git a/requirements/test.txt b/requirements/test.txt index 87b4932d..6fd4abe8 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -305,6 +305,7 @@ requests==2.32.5 # meilisearch # requests-toolbelt # twine + # typesense requests-toolbelt==1.0.0 # via twine rfc3986==2.0.0 @@ -343,6 +344,8 @@ tox==4.28.4 # via -r requirements/test.in twine==6.1.0 # via -r requirements/test.in +typesense==1.1.1 + # via -r requirements/base.txt typing-extensions==4.15.0 # via # -r requirements/base.txt diff --git a/tests/test_settings.py b/tests/test_settings.py new file mode 100644 index 00000000..ee44defe --- /dev/null +++ b/tests/test_settings.py @@ -0,0 +1,18 @@ +""" +Test functionality used in settings. +""" + +from unittest.mock import MagicMock + +from forum.settings.production import plugin_settings + + +def test_plugin_settings_typesense() -> None: + settings = MagicMock( + spec=["TYPESENSE_ENABLED", "FEATURES"], TYPESENSE_ENABLED=True, FEATURES={} + ) + + # This function changes settings in-place. + plugin_settings(settings) + + assert settings.FORUM_SEARCH_BACKEND == "forum.search.typesense.TypesenseBackend" diff --git a/tests/test_typesense.py b/tests/test_typesense.py new file mode 100644 index 00000000..16da561e --- /dev/null +++ b/tests/test_typesense.py @@ -0,0 +1,249 @@ +""" +Unit tests for the typesense search backend. +""" + +from unittest.mock import patch, MagicMock, Mock + +import pytest +from typesense.exceptions import ObjectNotFound + +from forum.search import typesense +from forum import constants + + +def test_quote_filter() -> None: + """Verify quoting unsafe filter values works as expected.""" + assert typesense.quote_filter_value("foo || true") == "`foo || true`" + assert typesense.quote_filter_value("foo` || true") == "`foo || true`" + assert typesense.quote_filter_value("mal`formed word[,]") == "`malformed word[,]`" + + +def test_build_collection_name() -> None: + assert typesense.collection_name() == "forum_unittest_prefix_forum" + + +def test_schemas() -> None: + """ + A very basic test to check the schema functions don't crash. + The contents are not checked. + """ + assert isinstance(typesense.collection_schema(), dict) + assert isinstance(typesense.expected_full_collection_schema(), dict) + + +def test_document_from_thread() -> None: + doc_id = "MY_ID" + data = { + "course_id": "course-v1:OpenedX+DemoX+DemoCourse", + "commentable_id": 4, + "context": "course", + "title": "My Thoughts!", + "body": "

Thought one: I like this course.

", + } + + expected_document = { + "id": "thread-MY_ID", + "thread_id": "MY_ID", + "course_id": "course-v1:OpenedX+DemoX+DemoCourse", + "commentable_id": "4", + "context": "course", + "text": "My Thoughts!\nThought one: I like this course.", + } + + assert typesense.document_from_thread(doc_id, data) == expected_document + + +def test_document_from_comment() -> None: + doc_id = "MY_ID" + data = { + "course_id": "course-v1:OpenedX+DemoX+DemoCourse", + "comment_thread_id": 6, + "context": "course", + "body": "

Another thought: I also like this course.

", + } + + expected_document = { + "id": "comment-MY_ID", + "thread_id": "6", + "course_id": "course-v1:OpenedX+DemoX+DemoCourse", + "commentable_id": "", + "context": "course", + "text": "Another thought: I also like this course.", + } + + assert typesense.document_from_comment(doc_id, data) == expected_document + + +@patch("forum.search.typesense.get_typesense_client") +def test_search_threads(mock_get_client: Mock) -> None: + mock_client = MagicMock() + mock_get_client.return_value = mock_client + mock_search = mock_client.collections[ + "forum_unittest_prefix_forum" + ].documents.search + mock_search.return_value = { + "hits": [{"document": {"thread_id": "ONE"}}, {"document": {"thread_id": "TWO"}}] + } + + backend = typesense.TypesenseThreadSearchBackend() + assert sorted( + backend.get_thread_ids( + context="course", + group_ids=[], + search_text="thoughts", + commentable_ids=["4", "7[`||"], + course_id="course-v1:OpenedX+DemoX+DemoCourse", + ) + ) == sorted(["ONE", "TWO"]) + + # test build_search_paramaters() here too; important to verify it backtick escapes the values + expected_params = { + "q": "thoughts", + "query_by": "text", + "filter_by": "context:`course` && commentable_ids:[`4`, `7[||`] " + "&& course_id:=`course-v1:OpenedX+DemoX+DemoCourse`", + "per_page": constants.FORUM_MAX_DEEP_SEARCH_COMMENT_COUNT, + } + mock_search.assert_called_once_with(expected_params) + + # suggested text is not supported; always returns None + assert backend.get_suggested_text("foo") is None + + +@patch("forum.search.typesense.get_typesense_client") +def test_index_comment_document(mock_get_client: Mock) -> None: + mock_client = MagicMock() + mock_get_client.return_value = mock_client + mock_index = mock_client.collections["forum_unittest_prefix_forum"].documents.upsert + + doc_id = "MY_ID" + data = { + "course_id": "course-v1:OpenedX+DemoX+DemoCourse", + "comment_thread_id": 6, + "context": "course", + "body": "

Another thought: I also like this course.

", + } + expected_document = { + "id": "comment-MY_ID", + "thread_id": "6", + "course_id": "course-v1:OpenedX+DemoX+DemoCourse", + "commentable_id": "", + "context": "course", + "text": "Another thought: I also like this course.", + } + + backend = typesense.TypesenseDocumentBackend() + backend.index_document("comments", doc_id, data) + mock_index.assert_called_once_with(expected_document) + + +@patch("forum.search.typesense.get_typesense_client") +def test_index_thread_document(mock_get_client: Mock) -> None: + mock_client = MagicMock() + mock_get_client.return_value = mock_client + mock_index = mock_client.collections["forum_unittest_prefix_forum"].documents.upsert + + doc_id = "MY_ID" + data = { + "course_id": "course-v1:OpenedX+DemoX+DemoCourse", + "commentable_id": 4, + "context": "course", + "title": "My Thoughts!", + "body": "

Thought one: I like this course.

", + } + + expected_document = { + "id": "thread-MY_ID", + "thread_id": "MY_ID", + "course_id": "course-v1:OpenedX+DemoX+DemoCourse", + "commentable_id": "4", + "context": "course", + "text": "My Thoughts!\nThought one: I like this course.", + } + + backend = typesense.TypesenseDocumentBackend() + backend.index_document("comment_threads", doc_id, data) + mock_index.assert_called_once_with(expected_document) + + +@patch("forum.search.typesense.get_typesense_client", MagicMock()) +def test_index_invalid_type() -> None: + backend = typesense.TypesenseDocumentBackend() + with pytest.raises(NotImplementedError): + backend.index_document("foo", "DOCID", {}) + + +@patch("forum.search.typesense.get_typesense_client") +def test_delete_document(mock_get_client: Mock) -> None: + mock_client = MagicMock() + mock_get_client.return_value = mock_client + mock_delete = ( + mock_client.collections["forum_unittest_prefix_forum"] + .documents["comment-MYCOMMENTID"] + .delete + ) + + backend = typesense.TypesenseDocumentBackend() + backend.delete_document("comments", "MYCOMMENTID") + mock_delete.assert_called_once() + + +@patch("forum.search.typesense.get_typesense_client") +def test_init_indexes_already_exist(mock_get_client: Mock) -> None: + mock_client = MagicMock() + mock_get_client.return_value = mock_client + mock_collection = mock_client.collections["forum_unittest_prefix_forum"] + mock_collection.retrieve.return_value = {"data": "irrelevant but index exists"} + + backend = typesense.TypesenseIndexBackend() + backend.initialize_indices() + + mock_collection.delete.assert_not_called() + mock_client.collections.create.assert_not_called() + + +@patch("forum.search.typesense.get_typesense_client") +def test_init_indexes_already_exist_force(mock_get_client: Mock) -> None: + mock_client = MagicMock() + mock_get_client.return_value = mock_client + mock_collection = mock_client.collections["forum_unittest_prefix_forum"] + mock_collection.retrieve.return_value = {"data": "irrelevant but index exists"} + + backend = typesense.TypesenseIndexBackend() + backend.initialize_indices(force_new_index=True) + + mock_collection.delete.assert_called_once() + mock_client.collections.create.assert_called_once() + + +@patch("forum.search.typesense.get_typesense_client") +def test_init_indexes_does_not_exist(mock_get_client: Mock) -> None: + mock_client = MagicMock() + mock_get_client.return_value = mock_client + mock_collection = mock_client.collections["forum_unittest_prefix_forum"] + mock_collection.retrieve.side_effect = ObjectNotFound + + backend = typesense.TypesenseIndexBackend() + backend.initialize_indices() + + mock_collection.delete.assert_not_called() + mock_client.collections.create.assert_called_once() + + +def test_index_noops() -> None: + """ + These methods should have no effect and require no mocks. + + They are noops on the Typesense backend. + """ + backend = typesense.TypesenseIndexBackend() + backend.refresh_indices() + assert backend.delete_unused_indices() == 0 + + +def test_get_client() -> None: + client1 = typesense.get_typesense_client() + client2 = typesense.get_typesense_client() + + assert client1 is client2 + assert client1.config.api_key == "example-typesense-api-key"