Skip to content

Commit 2c07a42

Browse files
committed
✨ Import of documents
Import of (markdown) documents. See #1567 #1569.
1 parent 3ab01c9 commit 2c07a42

File tree

14 files changed

+385
-109
lines changed

14 files changed

+385
-109
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ logs: ## display app-dev logs (follow mode)
213213
.PHONY: logs
214214

215215
run-backend: ## Start only the backend application and all needed services
216+
@$(COMPOSE) up --force-recreate -d docspec
216217
@$(COMPOSE) up --force-recreate -d celery-dev
217218
@$(COMPOSE) up --force-recreate -d y-provider-development
218219
@$(COMPOSE) up --force-recreate -d nginx

compose.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,8 @@ services:
217217
kc_postgresql:
218218
condition: service_healthy
219219
restart: true
220+
221+
docspec:
222+
image: ghcr.io/docspecio/api:2.0.0
223+
ports:
224+
- "4000:4000"

docs/env.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ These are the environment variables you can set for the `impress-backend` contai
103103
| USER_OIDC_ESSENTIAL_CLAIMS | Essential claims in OIDC token | [] |
104104
| Y_PROVIDER_API_BASE_URL | Y Provider url | |
105105
| Y_PROVIDER_API_KEY | Y provider API key | |
106+
| DOCSPEC_API_URL | URL to endpoint of DocSpec conversion API | |
106107

107108

108109
## impress-frontend image

env.d/development/common

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,5 +67,7 @@ DJANGO_SERVER_TO_SERVER_API_TOKENS=server-api-token
6767
Y_PROVIDER_API_BASE_URL=http://y-provider-development:4444/api/
6868
Y_PROVIDER_API_KEY=yprovider-api-key
6969

70+
DOCSPEC_API_URL=http://docspec:4000/conversion
71+
7072
# Theme customization
7173
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15

env.d/development/common.e2e

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ BURST_THROTTLE_RATES="200/minute"
33
COLLABORATION_API_URL=http://y-provider:4444/collaboration/api/
44
SUSTAINED_THROTTLE_RATES="200/hour"
55
Y_PROVIDER_API_BASE_URL=http://y-provider:4444/api/
6+
DOCSPEC_API_URL=http://docspec:4000/conversion
67

78
# Throttle
89
API_DOCUMENT_THROTTLE_RATE=1000/min

src/backend/core/api/serializers.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@
1010
from django.utils.text import slugify
1111
from django.utils.translation import gettext_lazy as _
1212

13+
from core.services import mime_types
1314
import magic
1415
from rest_framework import serializers
1516

1617
from core import choices, enums, models, utils, validators
1718
from core.services.ai_services import AI_ACTIONS
1819
from core.services.converter_services import (
1920
ConversionError,
20-
YdocConverter,
21+
Converter,
2122
)
2223

2324

@@ -187,6 +188,7 @@ class DocumentSerializer(ListDocumentSerializer):
187188

188189
content = serializers.CharField(required=False)
189190
websocket = serializers.BooleanField(required=False, write_only=True)
191+
file = serializers.FileField(required=False, write_only=True, allow_null=True)
190192

191193
class Meta:
192194
model = models.Document
@@ -203,6 +205,7 @@ class Meta:
203205
"deleted_at",
204206
"depth",
205207
"excerpt",
208+
"file",
206209
"is_favorite",
207210
"link_role",
208211
"link_reach",
@@ -460,7 +463,11 @@ def create(self, validated_data):
460463
language = user.language or language
461464

462465
try:
463-
document_content = YdocConverter().convert(validated_data["content"])
466+
document_content = Converter().convert(
467+
validated_data["content"],
468+
mime_types.MARKDOWN,
469+
mime_types.YJS
470+
)
464471
except ConversionError as err:
465472
raise serializers.ValidationError(
466473
{"content": ["Could not convert content"]}

src/backend/core/api/viewsets.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,12 @@
3939
from core.services.ai_services import AIService
4040
from core.services.collaboration_services import CollaborationService
4141
from core.services.converter_services import (
42+
ConversionError,
4243
ServiceUnavailableError as YProviderServiceUnavailableError,
43-
)
44-
from core.services.converter_services import (
4544
ValidationError as YProviderValidationError,
45+
Converter,
4646
)
47-
from core.services.converter_services import (
48-
YdocConverter,
49-
)
47+
from core.services import mime_types
5048
from core.tasks.mail import send_ask_for_access_mail
5149
from core.utils import extract_attachments, filter_descendants
5250

@@ -503,6 +501,27 @@ def perform_create(self, serializer):
503501
"IN SHARE ROW EXCLUSIVE MODE;"
504502
)
505503

504+
# Remove file from validated_data as it's not a model field
505+
# Process it if present
506+
uploaded_file = serializer.validated_data.pop("file", None)
507+
508+
# If a file is uploaded, convert it to Yjs format and set as content
509+
if uploaded_file:
510+
try:
511+
file_content = uploaded_file.read()
512+
513+
converter = Converter()
514+
converted_content = converter.convert(
515+
file_content,
516+
content_type=uploaded_file.content_type,
517+
accept=mime_types.YJS
518+
)
519+
serializer.validated_data["content"] = converted_content
520+
except ConversionError as err:
521+
raise drf.exceptions.ValidationError(
522+
{"file": ["Could not convert file content"]}
523+
) from err
524+
506525
obj = models.Document.add_root(
507526
creator=self.request.user,
508527
**serializer.validated_data,
@@ -1602,14 +1621,14 @@ def content(self, request, pk=None):
16021621
if base64_content is not None:
16031622
# Convert using the y-provider service
16041623
try:
1605-
yprovider = YdocConverter()
1624+
yprovider = Converter()
16061625
result = yprovider.convert(
16071626
base64.b64decode(base64_content),
1608-
"application/vnd.yjs.doc",
1627+
mime_types.YJS,
16091628
{
1610-
"markdown": "text/markdown",
1611-
"html": "text/html",
1612-
"json": "application/json",
1629+
"markdown": mime_types.MARKDOWN,
1630+
"html": mime_types.HTML,
1631+
"json": mime_types.JSON,
16131632
}[content_format],
16141633
)
16151634
content = result

src/backend/core/services/converter_services.py

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from django.conf import settings
66

77
import requests
8+
import typing
89

10+
from core.services import mime_types
911

1012
class ConversionError(Exception):
1113
"""Base exception for conversion-related errors."""
@@ -19,8 +21,65 @@ class ServiceUnavailableError(ConversionError):
1921
"""Raised when the conversion service is unavailable."""
2022

2123

24+
class ConverterProtocol(typing.Protocol):
25+
def convert(self, text, content_type, accept): ...
26+
27+
28+
class Converter:
29+
docspec: ConverterProtocol
30+
ydoc: ConverterProtocol
31+
32+
def __init__(self):
33+
self.docspec = DocSpecConverter()
34+
self.ydoc = YdocConverter()
35+
36+
def convert(self, input, content_type, accept):
37+
"""Convert input into other formats using external microservices."""
38+
39+
if content_type == mime_types.DOCX and accept == mime_types.YJS:
40+
return self.convert(
41+
self.docspec.convert(input, mime_types.DOCX, mime_types.BLOCKNOTE),
42+
mime_types.BLOCKNOTE,
43+
mime_types.YJS
44+
)
45+
46+
return self.ydoc.convert(input, content_type, accept)
47+
48+
49+
class DocSpecConverter:
50+
"""Service class for DocSpec conversion-related operations."""
51+
52+
def _request(self, url, data, content_type):
53+
"""Make a request to the DocSpec API."""
54+
55+
response = requests.post(
56+
url,
57+
headers={"Accept": mime_types.BLOCKNOTE},
58+
files={"file": ("document.docx", data, content_type)},
59+
timeout=settings.CONVERSION_API_TIMEOUT,
60+
verify=settings.CONVERSION_API_SECURE,
61+
)
62+
response.raise_for_status()
63+
return response
64+
65+
def convert(self, data, content_type, accept):
66+
"""Convert a Document to BlockNote."""
67+
if not data:
68+
raise ValidationError("Input data cannot be empty")
69+
70+
if content_type != mime_types.DOCX or accept != mime_types.BLOCKNOTE:
71+
raise ValidationError(f"Conversion from {content_type} to {accept} is not supported.")
72+
73+
try:
74+
return self._request(settings.DOCSPEC_API_URL, data, content_type).content
75+
except requests.RequestException as err:
76+
raise ServiceUnavailableError(
77+
"Failed to connect to DocSpec conversion service",
78+
) from err
79+
80+
2281
class YdocConverter:
23-
"""Service class for conversion-related operations."""
82+
"""Service class for YDoc conversion-related operations."""
2483

2584
@property
2685
def auth_header(self):
@@ -45,7 +104,7 @@ def _request(self, url, data, content_type, accept):
45104
return response
46105

47106
def convert(
48-
self, text, content_type="text/markdown", accept="application/vnd.yjs.doc"
107+
self, text, content_type=mime_types.MARKDOWN, accept=mime_types.YJS
49108
):
50109
"""Convert a Markdown text into our internal format using an external microservice."""
51110

@@ -59,14 +118,14 @@ def convert(
59118
content_type,
60119
accept,
61120
)
62-
if accept == "application/vnd.yjs.doc":
121+
if accept == mime_types.YJS:
63122
return b64encode(response.content).decode("utf-8")
64-
if accept in {"text/markdown", "text/html"}:
123+
if accept in {mime_types.MARKDOWN, "text/html"}:
65124
return response.text
66-
if accept == "application/json":
125+
if accept == mime_types.JSON:
67126
return response.json()
68127
raise ValidationError("Unsupported format")
69128
except requests.RequestException as err:
70129
raise ServiceUnavailableError(
71-
"Failed to connect to conversion service",
130+
f"Failed to connect to YDoc conversion service {content_type}, {accept}",
72131
) from err
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
BLOCKNOTE = "application/vnd.blocknote+json"
2+
YJS = "application/vnd.yjs.doc"
3+
MARKDOWN = "text/markdown"
4+
JSON = "application/json"
5+
DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
6+
HTML = "text/html"

src/backend/impress/settings.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,12 @@ class Base(Configuration):
680680
environ_prefix=None,
681681
)
682682

683+
# DocSpec API microservice
684+
DOCSPEC_API_URL = values.Value(
685+
environ_name="DOCSPEC_API_URL",
686+
environ_prefix=None
687+
)
688+
683689
# Conversion endpoint
684690
CONVERSION_API_ENDPOINT = values.Value(
685691
default="convert",

0 commit comments

Comments
 (0)