Skip to content

Commit e966594

Browse files
committed
✨ (backend) Add document search view.
New API view that calls the indexed documents search view (resource server) of app "Find". Signed-off-by: Fabre Florian <ffabre@hybird.org>
1 parent 9a904cf commit e966594

File tree

10 files changed

+312
-34
lines changed

10 files changed

+312
-34
lines changed

env.d/development/common

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ LOGOUT_REDIRECT_URL=http://localhost:3000
4949
OIDC_REDIRECT_ALLOWED_HOSTS=["http://localhost:8083", "http://localhost:3000"]
5050
OIDC_AUTH_REQUEST_EXTRA_PARAMS={"acr_values": "eidas1"}
5151

52+
# Store OIDC tokens in the session
53+
OIDC_STORE_ACCESS_TOKEN = True # Store the access token in the session
54+
OIDC_STORE_REFRESH_TOKEN = True # Store the encrypted refresh token in the session
55+
OIDC_STORE_REFRESH_TOKEN_KEY = "uoJc422rSQjOXx6QIU5NOXSxeiycT47NrGQIBTWsjFU="
56+
5257
# AI
5358
AI_FEATURE_ENABLED=true
5459
AI_BASE_URL=https://openaiendpoint.com

src/backend/core/api/serializers.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,3 +821,17 @@ class MoveDocumentSerializer(serializers.Serializer):
821821
choices=enums.MoveNodePositionChoices.choices,
822822
default=enums.MoveNodePositionChoices.LAST_CHILD,
823823
)
824+
825+
826+
class FindDocumentSerializer(serializers.Serializer):
827+
"""Serializer for Find search requests"""
828+
829+
q = serializers.CharField(required=True)
830+
831+
def validate_q(self, value):
832+
"""Ensure the text field is not empty."""
833+
834+
if len(value.strip()) == 0:
835+
raise serializers.ValidationError("Text field cannot be empty.")
836+
837+
return value

src/backend/core/api/viewsets.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from django.db.models.functions import Left, Length
2222
from django.http import Http404, StreamingHttpResponse
2323
from django.urls import reverse
24+
from django.utils.decorators import method_decorator
2425
from django.utils.functional import cached_property
2526
from django.utils.text import capfirst, slugify
2627
from django.utils.translation import gettext_lazy as _
@@ -31,6 +32,7 @@
3132
from csp.constants import NONE
3233
from csp.decorators import csp_update
3334
from lasuite.malware_detection import malware_detection
35+
from lasuite.oidc_login.decorators import refresh_oidc_access_token
3436
from rest_framework import filters, status, viewsets
3537
from rest_framework import response as drf_response
3638
from rest_framework.permissions import AllowAny
@@ -47,6 +49,7 @@
4749
from core.services.converter_services import (
4850
YdocConverter,
4951
)
52+
from core.services.search_indexers import FindDocumentIndexer
5053
from core.tasks.mail import send_ask_for_access_mail
5154
from core.utils import extract_attachments, filter_descendants
5255

@@ -367,6 +370,7 @@ class DocumentViewSet(
367370
list_serializer_class = serializers.ListDocumentSerializer
368371
trashbin_serializer_class = serializers.ListDocumentSerializer
369372
tree_serializer_class = serializers.ListDocumentSerializer
373+
search_serializer_class = serializers.ListDocumentSerializer
370374

371375
def get_queryset(self):
372376
"""Get queryset performing all annotation and filtering on the document tree structure."""
@@ -997,10 +1001,37 @@ def duplicate(self, request, *args, **kwargs):
9971001
{"id": str(duplicated_document.id)}, status=status.HTTP_201_CREATED
9981002
)
9991003

1000-
# TODO
1001-
# @drf.decorators.action(detail=False, methods=["get"])
1002-
# def search(self, request, *args, **kwargs):
1003-
# index.search()
1004+
@drf.decorators.action(detail=False, methods=["get"], url_path="search")
1005+
@method_decorator(refresh_oidc_access_token)
1006+
def search(self, request, *args, **kwargs):
1007+
"""
1008+
Returns a DRF response containing the filtered, annotated and ordered document list.
1009+
The filtering allows full text search through the opensearch indexation app "find".
1010+
"""
1011+
access_token = request.session.get("oidc_access_token")
1012+
1013+
serializer = serializers.FindDocumentSerializer(data=request.query_params)
1014+
serializer.is_valid(raise_exception=True)
1015+
1016+
try:
1017+
indexer = FindDocumentIndexer()
1018+
queryset = indexer.search(
1019+
text=serializer.validated_data.get("q", ""),
1020+
user=request.user,
1021+
token=access_token,
1022+
)
1023+
except RuntimeError:
1024+
return drf.response.Response(
1025+
{"detail": "The service is not configured properly."},
1026+
status=status.HTTP_401_UNAUTHORIZED,
1027+
)
1028+
1029+
return self.get_response_for_queryset(
1030+
queryset,
1031+
context={
1032+
"request": request,
1033+
},
1034+
)
10041035

10051036
@drf.decorators.action(detail=True, methods=["get"], url_path="versions")
10061037
def versions_list(self, request, *args, **kwargs):

src/backend/core/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@
4141
RoleChoices,
4242
get_equivalent_link_definition,
4343
)
44-
from .validators import sub_validator
4544
from .tasks.find import trigger_document_indexer
45+
from .validators import sub_validator
4646

4747
logger = getLogger(__name__)
4848

src/backend/core/services/search_indexers.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,19 @@ def get_batch_accesses_by_users_and_teams(paths):
4848

4949

5050
def get_visited_document_ids_of(user):
51+
"""
52+
Returns the ids of the documents that have a linktrace to the user and NOT owned.
53+
It will be use to limit the opensearch responses to the public documents already
54+
"visited" by the user.
55+
"""
5156
if isinstance(user, AnonymousUser):
5257
return []
5358

54-
# TODO : exclude links when user already have a specific access to the doc
55-
qs = models.LinkTrace.objects.filter(
56-
user=user
57-
).exclude(
59+
qs = models.LinkTrace.objects.filter(user=user).exclude(
5860
document__accesses__user=user,
5961
)
6062

61-
return list({
62-
str(id) for id in qs.values_list("document_id", flat=True)
63-
})
63+
return list({str(id) for id in qs.values_list("document_id", flat=True)})
6464

6565

6666
class BaseDocumentIndexer(ABC):
@@ -129,13 +129,14 @@ def search(self, text, user, token):
129129
"""
130130
visited_ids = get_visited_document_ids_of(user)
131131

132-
response = self.search_query(data={
133-
"q": text,
134-
"visited": visited_ids,
135-
"services": ["docs"],
136-
}, token=token)
137-
138-
print(response)
132+
response = self.search_query(
133+
data={
134+
"q": text,
135+
"visited": visited_ids,
136+
"services": ["docs"],
137+
},
138+
token=token,
139+
)
139140

140141
return self.format_response(response)
141142

@@ -207,7 +208,7 @@ def search_query(self, data, token) -> requests.Response:
207208

208209
if not url:
209210
raise RuntimeError(
210-
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before indexing."
211+
"SEARCH_INDEXER_QUERY_URL must be set in Django settings before search."
211212
)
212213

213214
try:
@@ -228,9 +229,7 @@ def format_response(self, data: dict):
228229
"""
229230
Retrieve documents ids from Find app response and return a queryset.
230231
"""
231-
return models.Document.objects.filter(pk__in=[
232-
d['_id'] for d in data
233-
])
232+
return models.Document.objects.filter(pk__in=[d["_id"] for d in data])
234233

235234
def push(self, data):
236235
"""

src/backend/core/tasks/find.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ def _aux():
8686

8787
logger.info(
8888
"Add task for document %s indexation in %.2f seconds",
89-
document.pk, countdown
89+
document.pk,
90+
countdown,
9091
)
9192

9293
# Each time this method is called during the countdown, we increment the

src/backend/core/tests/commands/test_index.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_index():
2121

2222
with transaction.atomic():
2323
doc = factories.DocumentFactory()
24-
empty_doc = factories.DocumentFactory(title=None, content='')
24+
empty_doc = factories.DocumentFactory(title=None, content="")
2525
no_title_doc = factories.DocumentFactory(title=None)
2626

2727
factories.UserDocumentAccessFactory(document=doc, user=user)
@@ -43,7 +43,10 @@ def sortkey(d):
4343
push_call_args = [call.args[0] for call in mock_push.call_args_list]
4444

4545
assert len(push_call_args) == 1 # called once but with a batch of docs
46-
assert sorted(push_call_args[0], key=sortkey) == sorted([
47-
indexer.serialize_document(doc, accesses),
48-
indexer.serialize_document(no_title_doc, accesses),
49-
], key=sortkey)
46+
assert sorted(push_call_args[0], key=sortkey) == sorted(
47+
[
48+
indexer.serialize_document(doc, accesses),
49+
indexer.serialize_document(no_title_doc, accesses),
50+
],
51+
key=sortkey,
52+
)
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
"""
2+
Tests for Documents API endpoint in impress's core app: list
3+
"""
4+
5+
import pytest
6+
import responses
7+
from faker import Faker
8+
from rest_framework.test import APIClient
9+
10+
from core import factories, models
11+
12+
fake = Faker()
13+
pytestmark = pytest.mark.django_db
14+
15+
16+
@pytest.mark.parametrize("role", models.LinkRoleChoices.values)
17+
@pytest.mark.parametrize("reach", models.LinkReachChoices.values)
18+
@responses.activate
19+
def test_api_documents_search_anonymous(reach, role, settings):
20+
"""
21+
Anonymous users should not be allowed to search documents whatever the
22+
link reach and link role
23+
"""
24+
factories.DocumentFactory(link_reach=reach, link_role=role)
25+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
26+
27+
factories.DocumentFactory(link_reach=reach, link_role=role)
28+
29+
# Find response
30+
responses.add(
31+
responses.POST,
32+
"http://find/api/v1.0/search",
33+
json=[],
34+
status=200,
35+
)
36+
37+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
38+
39+
assert response.status_code == 200
40+
assert response.json() == {
41+
"count": 0,
42+
"next": None,
43+
"previous": None,
44+
"results": [],
45+
}
46+
47+
48+
def test_api_documents_search_endpoint_is_none(settings):
49+
"""Missing SEARCH_INDEXER_QUERY_URL should throw an error"""
50+
settings.SEARCH_INDEXER_QUERY_URL = None
51+
52+
user = factories.UserFactory()
53+
54+
client = APIClient()
55+
client.force_login(user)
56+
57+
response = APIClient().get("/api/v1.0/documents/search/", data={"q": "alpha"})
58+
59+
assert response.status_code == 401
60+
assert response.json() == {"detail": "The service is not configured properly."}
61+
62+
63+
@responses.activate
64+
def test_api_documents_search_invalid_params(settings):
65+
"""Validate the format of documents as returned by the search view."""
66+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
67+
68+
user = factories.UserFactory()
69+
70+
client = APIClient()
71+
client.force_login(user)
72+
73+
response = APIClient().get("/api/v1.0/documents/search/")
74+
75+
assert response.status_code == 400
76+
assert response.json() == {"q": ["This field is required."]}
77+
78+
79+
@responses.activate
80+
def test_api_documents_search_format(settings):
81+
"""Validate the format of documents as returned by the search view."""
82+
settings.SEARCH_INDEXER_QUERY_URL = "http://find/api/v1.0/search"
83+
84+
user = factories.UserFactory()
85+
86+
client = APIClient()
87+
client.force_login(user)
88+
89+
user_a, user_b, user_c = factories.UserFactory.create_batch(3)
90+
document = factories.DocumentFactory(
91+
title="alpha",
92+
users=(user_a, user_c),
93+
link_traces=(user, user_b),
94+
)
95+
access = factories.UserDocumentAccessFactory(document=document, user=user)
96+
97+
# Find response
98+
responses.add(
99+
responses.POST,
100+
"http://find/api/v1.0/search",
101+
json=[
102+
{"_id": str(document.pk)},
103+
],
104+
status=200,
105+
)
106+
response = client.get("/api/v1.0/documents/search/", data={"q": "alpha"})
107+
108+
assert response.status_code == 200
109+
content = response.json()
110+
results = content.pop("results")
111+
assert content == {
112+
"count": 1,
113+
"next": None,
114+
"previous": None,
115+
}
116+
assert len(results) == 1
117+
assert results[0] == {
118+
"id": str(document.id),
119+
"abilities": document.get_abilities(user),
120+
"ancestors_link_reach": None,
121+
"ancestors_link_role": None,
122+
"computed_link_reach": document.computed_link_reach,
123+
"computed_link_role": document.computed_link_role,
124+
"created_at": document.created_at.isoformat().replace("+00:00", "Z"),
125+
"creator": str(document.creator.id),
126+
"depth": 1,
127+
"excerpt": document.excerpt,
128+
"link_reach": document.link_reach,
129+
"link_role": document.link_role,
130+
"nb_accesses_ancestors": 3,
131+
"nb_accesses_direct": 3,
132+
"numchild": 0,
133+
"path": document.path,
134+
"title": document.title,
135+
"updated_at": document.updated_at.isoformat().replace("+00:00", "Z"),
136+
"user_role": access.role,
137+
}

0 commit comments

Comments
 (0)