Skip to content

Commit 1f94126

Browse files
committed
WIP ✨ (backend) Add document search view.
TODO : unit tests with iocd auth Signed-off-by: Fabre Florian <ffabre@hybird.org>
1 parent a01ade3 commit 1f94126

File tree

6 files changed

+275
-9
lines changed

6 files changed

+275
-9
lines changed

src/backend/core/api/serializers.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,3 +801,16 @@ class MoveDocumentSerializer(serializers.Serializer):
801801
choices=enums.MoveNodePositionChoices.choices,
802802
default=enums.MoveNodePositionChoices.LAST_CHILD,
803803
)
804+
805+
806+
class FindDocumentSerializer(serializers.Serializer):
807+
"""Serializer for Find search requests"""
808+
q = serializers.CharField(required=True)
809+
810+
def validate_q(self, value):
811+
"""Ensure the text field is not empty."""
812+
813+
if len(value.strip()) == 0:
814+
raise serializers.ValidationError("Text field cannot be empty.")
815+
816+
return value

src/backend/core/api/viewsets.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from django.db.models.functions import Left, Length
2020
from django.http import Http404, StreamingHttpResponse
2121
from django.urls import reverse
22+
from django.utils.decorators import method_decorator
2223
from django.utils.functional import cached_property
2324
from django.utils.text import capfirst, slugify
2425
from django.utils.translation import gettext_lazy as _
@@ -29,6 +30,7 @@
2930
from csp.constants import NONE
3031
from csp.decorators import csp_update
3132
from lasuite.malware_detection import malware_detection
33+
from lasuite.oidc_login.decorators import refresh_oidc_access_token
3234
from rest_framework import filters, status, viewsets
3335
from rest_framework import response as drf_response
3436
from rest_framework.permissions import AllowAny
@@ -37,6 +39,7 @@
3739
from core import authentication, choices, enums, models
3840
from core.services.ai_services import AIService
3941
from core.services.collaboration_services import CollaborationService
42+
from core.services.search_indexers import FindDocumentIndexer
4043
from core.tasks.mail import send_ask_for_access_mail
4144
from core.utils import extract_attachments, filter_descendants
4245

@@ -48,6 +51,12 @@
4851
# pylint: disable=too-many-ancestors
4952

5053

54+
class ServiceUnavailable(drf.exceptions.APIException):
55+
status_code = 503
56+
default_detail = 'Service unavailable.'
57+
default_code = 'service_unavailable'
58+
59+
5160
class NestedGenericViewSet(viewsets.GenericViewSet):
5261
"""
5362
A generic Viewset aims to be used in a nested route context.
@@ -367,6 +376,7 @@ class DocumentViewSet(
367376
list_serializer_class = serializers.ListDocumentSerializer
368377
trashbin_serializer_class = serializers.ListDocumentSerializer
369378
tree_serializer_class = serializers.ListDocumentSerializer
379+
search_serializer_class = serializers.ListDocumentSerializer
370380

371381
def get_queryset(self):
372382
"""Get queryset performing all annotation and filtering on the document tree structure."""
@@ -980,10 +990,32 @@ def duplicate(self, request, *args, **kwargs):
980990
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
981991
)
982992

983-
# TODO
984-
# @drf.decorators.action(detail=False, methods=["get"])
985-
# def search(self, request, *args, **kwargs):
986-
# index.search()
993+
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
994+
@method_decorator(refresh_oidc_access_token)
995+
def search(self, request, *args, **kwargs):
996+
access_token = request.session.get("oidc_access_token")
997+
998+
serializer = serializers.FindDocumentSerializer(
999+
data=request.query_params
1000+
)
1001+
serializer.is_valid(raise_exception=True)
1002+
1003+
indexer = FindDocumentIndexer()
1004+
try:
1005+
queryset = indexer.search(
1006+
text=serializer.validated_data.get("q", ""),
1007+
user=request.user,
1008+
token=access_token
1009+
)
1010+
except RuntimeError as err:
1011+
raise ServiceUnavailable()
1012+
1013+
return self.get_response_for_queryset(
1014+
queryset,
1015+
context={
1016+
"request": request,
1017+
},
1018+
)
9871019

9881020
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
9891021
def versions_list(self, request, *args, **kwargs):

src/backend/core/services/search_indexers.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,6 @@ def search(self, text, user, token):
135135
"services": ["docs"],
136136
}, token=token)
137137

138-
print(response)
139-
140138
return self.format_response(response)
141139

142140
@abstractmethod
@@ -207,7 +205,7 @@ def search_query(self, data, token) -> requests.Response:
207205

208206
if not url:
209207
raise RuntimeError(
210-
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before indexing."
208+
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before search."
211209
)
212210

213211
try:
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
"""
2+
Tests for Documents API endpoint in impress's core app: list
3+
"""
4+
import responses
5+
6+
import pytest
7+
from faker import Faker
8+
from rest_framework.test import APIClient
9+
10+
from core import factories, models
11+
12+
fake = Faker()
13+
pytestmark = pytest.mark.django_db
14+
15+
16+
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
17+
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
18+
@responses.activate
19+
def test_api_documents_search_anonymous(reach, role, settings):
20+
"""
21+
Anonymous users should not be allowed to search documents whatever the
22+
link reach and link role
23+
"""
24+
factories.DocumentFactory(link_reach=reach, link_role=role)
25+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
26+
27+
factories.DocumentFactory(link_reach=reach, link_role=role)
28+
29+
# Find response
30+
responses.add(
31+
responses.POST,
32+
"http://find/api/v1.0/search",
33+
json=[],
34+
status=200,
35+
)
36+
37+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
38+
39+
assert response.status_code == 200
40+
assert response.json() == {
41+
"count": 0,
42+
"next": None,
43+
"previous": None,
44+
"results": [],
45+
}
46+
47+
48+
def test_api_documents_search_endpoint_is_none(settings):
49+
"""Missing SEARCH_INDEXER_QUERY_URL should throw an error"""
50+
settings.SEARCH_INDEXER_QUERY_URL = None
51+
52+
user = factories.UserFactory()
53+
54+
client = APIClient()
55+
client.force_login(user)
56+
57+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
58+
59+
assert response.status_code == 503
60+
assert response.json() == {
61+
'detail': 'Service unavailable.'
62+
}
63+
64+
65+
@responses.activate
66+
def test_api_documents_search_invalid_params(settings):
67+
"""Validate the format of documents as returned by the search view."""
68+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
69+
70+
user = factories.UserFactory()
71+
72+
client = APIClient()
73+
client.force_login(user)
74+
75+
response = APIClient().get("/api/v1.0/documents/search/")
76+
77+
assert response.status_code == 400
78+
assert response.json() == {
79+
'q': ['This field is required.']
80+
}
81+
82+
83+
@responses.activate
84+
def test_api_documents_search_format(settings):
85+
"""Validate the format of documents as returned by the search view."""
86+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
87+
88+
user = factories.UserFactory()
89+
90+
client = APIClient()
91+
client.force_login(user)
92+
93+
user_a, user_b, user_c = factories.UserFactory.create_batch(3)
94+
document = factories.DocumentFactory(
95+
title="alpha",
96+
users=(user_a, user_c),
97+
link_traces=(user, user_b),
98+
)
99+
access = factories.UserDocumentAccessFactory(document=document, user=user)
100+
101+
# Find response
102+
responses.add(
103+
responses.POST,
104+
"http://find/api/v1.0/search",
105+
json=[
106+
{"_id": str(document.pk)},
107+
],
108+
status=200,
109+
)
110+
response = client.get("/api/v1.0/documents/search/", data={"q": "alpha"})
111+
112+
assert response.status_code == 200
113+
content = response.json()
114+
results = content.pop("results")
115+
assert content == {
116+
"count": 1,
117+
"next": None,
118+
"previous": None,
119+
}
120+
assert len(results) == 1
121+
assert results[0] == {
122+
"id": str(document.id),
123+
"abilities": document.get_abilities(user),
124+
"ancestors_link_reach": None,
125+
"ancestors_link_role": None,
126+
"computed_link_reach": document.computed_link_reach,
127+
"computed_link_role": document.computed_link_role,
128+
"created_at": document.created_at.isoformat().replace("+00:00", "Z"),
129+
"creator": str(document.creator.id),
130+
"depth": 1,
131+
"excerpt": document.excerpt,
132+
"link_reach": document.link_reach,
133+
"link_role": document.link_role,
134+
"nb_accesses_ancestors": 3,
135+
"nb_accesses_direct": 3,
136+
"numchild": 0,
137+
"path": document.path,
138+
"title": document.title,
139+
"updated_at": document.updated_at.isoformat().replace("+00:00", "Z"),
140+
"user_role": access.role,
141+
}

src/backend/core/tests/test_services_search_indexers.py

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""Tests for Documents search indexers"""
22

3+
from functools import partial
34
from unittest.mock import patch
45

56
import pytest
67

7-
from core import factories, utils
8-
from core.services.search_indexers import FindDocumentIndexer
8+
from django.contrib.auth.models import AnonymousUser
9+
10+
from core import factories, models, utils
11+
from core.services.search_indexers import FindDocumentIndexer, get_visited_document_ids_of
912

1013
pytestmark = pytest.mark.django_db
1114

@@ -258,3 +261,79 @@ def test_push_uses_correct_url_and_data(mock_post, settings):
258261
assert args[0] == settings.SEARCH_INDEXER_URL
259262
assert kwargs.get("json") == sample_data
260263
assert kwargs.get("timeout") == 10
264+
265+
266+
def test_get_visited_document_ids_of():
267+
"""
268+
get_visited_document_ids_of() returns the ids of the documents viewed
269+
by the user BUT without specific access configuration (like public ones)
270+
"""
271+
user = factories.UserFactory()
272+
other = factories.UserFactory()
273+
anonymous = AnonymousUser()
274+
275+
assert get_visited_document_ids_of(anonymous) == []
276+
assert get_visited_document_ids_of(user) == []
277+
278+
doc1, doc2, _ = factories.DocumentFactory.create_batch(3)
279+
280+
create_link = partial(models.LinkTrace.objects.create, user=user, is_masked=False)
281+
282+
create_link(document=doc1)
283+
create_link(document=doc2)
284+
285+
# The third document is not visited
286+
assert sorted(get_visited_document_ids_of(user)) == sorted([str(doc1.pk), str(doc2.pk)])
287+
288+
factories.UserDocumentAccessFactory(user=other, document=doc1)
289+
factories.UserDocumentAccessFactory(user=user, document=doc2)
290+
291+
# The second document have an access for the user
292+
assert get_visited_document_ids_of(user) == [str(doc1.pk)]
293+
294+
295+
@patch("requests.post")
296+
def test_services_search_indexers_search(mock_post, settings):
297+
user = factories.UserFactory()
298+
indexer = FindDocumentIndexer()
299+
300+
mock_response = mock_post.return_value
301+
mock_response.raise_for_status.return_value = None # No error
302+
303+
doc1, doc2, _ = factories.DocumentFactory.create_batch(3)
304+
305+
create_link = partial(models.LinkTrace.objects.create, user=user, is_masked=False)
306+
307+
create_link(document=doc1)
308+
create_link(document=doc2)
309+
310+
indexer.search('alpha', user=user, token='mytoken')
311+
312+
args, kwargs = mock_post.call_args
313+
314+
assert args[0] == settings.SEARCH_INDEXER_QUERY_URL
315+
316+
query_data = kwargs.get("json")
317+
assert query_data['q'] == 'alpha'
318+
assert sorted(query_data['visited']) == sorted([str(doc1.pk), str(doc2.pk)])
319+
assert query_data['services'] == ['docs']
320+
321+
assert kwargs.get("headers") == {"Authorization": "Bearer mytoken"}
322+
assert kwargs.get("timeout") == 10
323+
324+
325+
def test_search_query_raises_error_if_search_endpoint_is_none(settings):
326+
"""
327+
Indexer should raise RuntimeError if SEARCH_INDEXER_QUERY_URL is None or empty.
328+
"""
329+
settings.SEARCH_INDEXER_QUERY_URL = None
330+
indexer = FindDocumentIndexer()
331+
user = factories.UserFactory()
332+
333+
with pytest.raises(RuntimeError) as exc_info:
334+
indexer.search('alpha', user=user, token='mytoken')
335+
336+
assert (
337+
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before indexing."
338+
in str(exc_info.value)
339+
)

src/backend/impress/settings.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ class Base(Configuration):
109109
SEARCH_INDEXER_SECRET = values.Value(
110110
default=None, environ_name="SEARCH_INDEXER_SECRET", environ_prefix=None
111111
)
112+
SEARCH_INDEXER_QUERY_URL = values.Value(
113+
default=None, environ_name="SEARCH_INDEXER_QUERY_URL", environ_prefix=None
114+
)
112115

113116
# Static files (CSS, JavaScript, Images)
114117
STATIC_URL = "/static/"

0 commit comments

Comments
 (0)