Skip to content

Commit 76a2bbd

Browse files
committed
✨(backend) keep ordering from fulltext search in results
Keep ordering by score from Find API on search/ results and fallback search still uses "-update_at" ordering as default Refactor pagination to work with a list instead of a queryset Signed-off-by: Fabre Florian <ffabre@hybird.org>
1 parent e816460 commit 76a2bbd

File tree

2 files changed

+245
-47
lines changed

2 files changed

+245
-47
lines changed

src/backend/core/api/viewsets.py

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,38 +1071,58 @@ def duplicate(self, request, *args, **kwargs):
10711071
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
10721072
)
10731073

1074-
def _simple_search_queryset(self, params):
1074+
def _search_simple(self, request, text):
10751075
"""
10761076
Returns a queryset filtered by the content of the document title
10771077
"""
1078-
text = params.validated_data["q"]
1079-
10801078
# As the 'list' view we get a prefiltered queryset (deleted docs are excluded)
10811079
queryset = self.get_queryset()
10821080
filterset = DocumentFilter({"title": text}, queryset=queryset)
10831081

10841082
if not filterset.is_valid():
10851083
raise drf.exceptions.ValidationError(filterset.errors)
10861084

1087-
return filterset.filter_queryset(queryset)
1085+
queryset = filterset.filter_queryset(queryset)
1086+
1087+
return self.get_response_for_queryset(
1088+
queryset.order_by("-updated_at"),
1089+
context={
1090+
"request": request,
1091+
},
1092+
)
10881093

1089-
def _fulltext_search_queryset(self, indexer, token, user, params):
1094+
def _search_fulltext(self, indexer, request, params):
10901095
"""
10911096
Returns a queryset from the results the fulltext search of Find
10921097
"""
1098+
access_token = request.session.get("oidc_access_token")
1099+
user = request.user
10931100
text = params.validated_data["q"]
10941101
queryset = models.Document.objects.all()
10951102

10961103
# Retrieve the documents ids from Find.
10971104
results = indexer.search(
10981105
text=text,
1099-
token=token,
1106+
token=access_token,
11001107
visited=get_visited_document_ids_of(queryset, user),
1101-
page=params.validated_data.get("page", 1),
1102-
page_size=params.validated_data.get("page_size", 20),
1108+
page=1,
1109+
page_size=100,
11031110
)
11041111

1105-
return queryset.filter(pk__in=results)
1112+
docs_by_uuid = {str(d.pk): d for d in queryset.filter(pk__in=results)}
1113+
ordered_docs = [docs_by_uuid[id] for id in results]
1114+
1115+
page = self.paginate_queryset(ordered_docs)
1116+
1117+
serializer = self.get_serializer(
1118+
page if page else ordered_docs,
1119+
many=True,
1120+
context={
1121+
"request": request,
1122+
},
1123+
)
1124+
1125+
return self.get_paginated_response(serializer.data)
11061126

11071127
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
11081128
@method_decorator(refresh_oidc_access_token)
@@ -1118,29 +1138,17 @@ def search(self, request, *args, **kwargs):
11181138
11191139
The ordering is always by the most recent first.
11201140
"""
1121-
access_token = request.session.get("oidc_access_token")
1122-
user = request.user
1123-
11241141
params = serializers.SearchDocumentSerializer(data=request.query_params)
11251142
params.is_valid(raise_exception=True)
11261143

11271144
indexer = get_document_indexer()
11281145

11291146
if indexer:
1130-
queryset = self._fulltext_search_queryset(
1131-
indexer, token=access_token, user=user, params=params
1132-
)
1133-
else:
1134-
# The indexer is not configured, we fallback on a simple icontains filter by the
1135-
# model field 'title'.
1136-
queryset = self._simple_search_queryset(params)
1147+
return self._search_fulltext(indexer, request, params=params)
11371148

1138-
return self.get_response_for_queryset(
1139-
queryset.order_by("-updated_at"),
1140-
context={
1141-
"request": request,
1142-
},
1143-
)
1149+
# The indexer is not configured, we fallback on a simple icontains filter by the
1150+
# model field 'title'.
1151+
return self._search_simple(request, text=params.validated_data["q"])
11441152

11451153
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
11461154
def versions_list(self, request, *args, **kwargs):

src/backend/core/tests/documents/test_api_documents_search.py

Lines changed: 212 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
Tests for Documents API endpoint in impress's core app: list
33
"""
44

5+
import random
56
from json import loads as json_loads
67

8+
from django.test import RequestFactory
9+
710
import pytest
811
import responses
912
from faker import Faker
@@ -16,6 +19,15 @@
1619
pytestmark = pytest.mark.django_db
1720

1821

22+
def build_search_url(**kwargs):
23+
"""Build absolute uri for search endpoint with ORDERED query arguments"""
24+
return (
25+
RequestFactory()
26+
.get("/api/v1.0/documents/search/", dict(sorted(kwargs.items())))
27+
.build_absolute_uri()
28+
)
29+
30+
1931
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
2032
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
2133
@responses.activate
@@ -191,8 +203,62 @@ def test_api_documents_search_format(indexer_settings):
191203

192204

193205
@responses.activate
194-
def test_api_documents_search_pagination(indexer_settings):
195-
"""Documents should be ordered by descending "updated_at" by default"""
206+
@pytest.mark.parametrize(
207+
"pagination, status, expected",
208+
(
209+
(
210+
{"page": 1, "page_size": 10},
211+
200,
212+
{
213+
"count": 10,
214+
"previous": None,
215+
"next": None,
216+
"range": (0, None),
217+
},
218+
),
219+
(
220+
{},
221+
200,
222+
{
223+
"count": 10,
224+
"previous": None,
225+
"next": None,
226+
"range": (0, None),
227+
"api_page_size": 21, # default page_size is 20
228+
},
229+
),
230+
(
231+
{"page": 2, "page_size": 10},
232+
404,
233+
{},
234+
),
235+
(
236+
{"page": 1, "page_size": 5},
237+
200,
238+
{
239+
"count": 10,
240+
"previous": None,
241+
"next": {"page": 2, "page_size": 5},
242+
"range": (0, 5),
243+
},
244+
),
245+
(
246+
{"page": 2, "page_size": 5},
247+
200,
248+
{
249+
"count": 10,
250+
"previous": {"page_size": 5},
251+
"next": None,
252+
"range": (5, None),
253+
},
254+
),
255+
({"page": 3, "page_size": 5}, 404, {}),
256+
),
257+
)
258+
def test_api_documents_search_pagination(
259+
indexer_settings, pagination, status, expected
260+
):
261+
"""Documents should be ordered by descending "score" by default"""
196262
indexer_settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
197263

198264
assert get_document_indexer() is not None
@@ -202,35 +268,159 @@ def test_api_documents_search_pagination(indexer_settings):
202268
client = APIClient()
203269
client.force_login(user)
204270

205-
docs = factories.DocumentFactory.create_batch(10)
271+
docs = factories.DocumentFactory.create_batch(10, title="alpha", users=[user])
272+
273+
docs_by_uuid = {str(doc.pk): doc for doc in docs}
274+
api_results = [{"_id": id} for id in docs_by_uuid.keys()]
275+
276+
# reorder randomly to simulate score ordering
277+
random.shuffle(api_results)
206278

207279
# Find response
208280
# pylint: disable-next=assignment-from-none
209281
api_search = responses.add(
210282
responses.POST,
211283
"http://find/api/v1.0/search",
212-
json=[{"_id": str(doc.pk)} for doc in docs],
284+
json=api_results,
213285
status=200,
214286
)
215287

216288
response = client.get(
217-
"/api/v1.0/documents/search/", data={"q": "alpha", "page": 2, "page_size": 5}
289+
"/api/v1.0/documents/search/",
290+
data={
291+
"q": "alpha",
292+
**pagination,
293+
},
218294
)
219295

220-
assert response.status_code == 200
221-
content = response.json()
222-
results = content.pop("results")
223-
assert len(results) == 5
224-
225-
# Check the query parameters.
226-
assert api_search.call_count == 1
227-
assert api_search.calls[0].response.status_code == 200
228-
assert json_loads(api_search.calls[0].request.body) == {
229-
"q": "alpha",
230-
"visited": [],
231-
"services": ["docs"],
232-
"page_number": 2,
233-
"page_size": 5,
234-
"order_by": "updated_at",
235-
"order_direction": "desc",
236-
}
296+
assert response.status_code == status
297+
298+
if response.status_code < 300:
299+
previous_url = (
300+
build_search_url(q="alpha", **expected["previous"])
301+
if expected["previous"]
302+
else None
303+
)
304+
next_url = (
305+
build_search_url(q="alpha", **expected["next"])
306+
if expected["next"]
307+
else None
308+
)
309+
start, end = expected["range"]
310+
311+
content = response.json()
312+
313+
assert content["count"] == expected["count"]
314+
assert content["previous"] == previous_url
315+
assert content["next"] == next_url
316+
317+
results = content.pop("results")
318+
319+
# The find api results ordering by score is kept
320+
assert [r["id"] for r in results] == [r["_id"] for r in api_results[start:end]]
321+
322+
# Check the query parameters.
323+
assert api_search.call_count == 1
324+
assert api_search.calls[0].response.status_code == 200
325+
assert json_loads(api_search.calls[0].request.body) == {
326+
"q": "alpha",
327+
"visited": [],
328+
"services": ["docs"],
329+
"page_number": 1,
330+
"page_size": 100,
331+
"order_by": "updated_at",
332+
"order_direction": "desc",
333+
}
334+
335+
336+
@responses.activate
337+
@pytest.mark.parametrize(
338+
"pagination, status, expected",
339+
(
340+
(
341+
{"page": 1, "page_size": 10},
342+
200,
343+
{"count": 10, "previous": None, "next": None, "range": (0, None)},
344+
),
345+
(
346+
{},
347+
200,
348+
{"count": 10, "previous": None, "next": None, "range": (0, None)},
349+
),
350+
(
351+
{"page": 2, "page_size": 10},
352+
404,
353+
{},
354+
),
355+
(
356+
{"page": 1, "page_size": 5},
357+
200,
358+
{
359+
"count": 10,
360+
"previous": None,
361+
"next": {"page": 2, "page_size": 5},
362+
"range": (0, 5),
363+
},
364+
),
365+
(
366+
{"page": 2, "page_size": 5},
367+
200,
368+
{
369+
"count": 10,
370+
"previous": {"page_size": 5},
371+
"next": None,
372+
"range": (5, None),
373+
},
374+
),
375+
({"page": 3, "page_size": 5}, 404, {}),
376+
),
377+
)
378+
def test_api_documents_search_pagination_endpoint_is_none(
379+
indexer_settings, pagination, status, expected
380+
):
381+
"""Documents should be ordered by descending "-updated_at" by default"""
382+
indexer_settings.SEARCH_INDEXER_QUERY_URL = None
383+
384+
assert get_document_indexer() is None
385+
386+
user = factories.UserFactory()
387+
388+
client = APIClient()
389+
client.force_login(user)
390+
391+
factories.DocumentFactory.create_batch(10, title="alpha", users=[user])
392+
393+
response = client.get(
394+
"/api/v1.0/documents/search/",
395+
data={
396+
"q": "alpha",
397+
**pagination,
398+
},
399+
)
400+
401+
assert response.status_code == status
402+
403+
if response.status_code < 300:
404+
previous_url = (
405+
build_search_url(q="alpha", **expected["previous"])
406+
if expected["previous"]
407+
else None
408+
)
409+
next_url = (
410+
build_search_url(q="alpha", **expected["next"])
411+
if expected["next"]
412+
else None
413+
)
414+
queryset = models.Document.objects.order_by("-updated_at")
415+
start, end = expected["range"]
416+
expected_results = [str(d.pk) for d in queryset[start:end]]
417+
418+
content = response.json()
419+
420+
assert content["count"] == expected["count"]
421+
assert content["previous"] == previous_url
422+
assert content["next"] == next_url
423+
424+
results = content.pop("results")
425+
426+
assert [r["id"] for r in results] == expected_results

0 commit comments

Comments
 (0)