-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathutils.py
More file actions
53 lines (45 loc) · 1.44 KB
/
utils.py
File metadata and controls
53 lines (45 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# utils.py
import os
import io
from cryptography.fernet import Fernet
from dotenv import load_dotenv
load_dotenv()
FERNET_KEY = os.getenv("FERNET_KEY")
if not FERNET_KEY:
# generate and print one for your .env (only once)
key = Fernet.generate_key()
print("Generated FERNET_KEY:", key.decode())
FERNET_KEY = key.decode()
fernet = Fernet(FERNET_KEY.encode())
def encrypt_bytes(b: bytes) -> bytes:
return fernet.encrypt(b)
def decrypt_bytes(b: bytes) -> bytes:
return fernet.decrypt(b)
# Basic file parsers
from pdfminer.high_level import extract_text as pdf_extract_text
import docx
def parse_pdf(file_bytes: bytes) -> str:
with io.BytesIO(file_bytes) as f:
try:
text = pdf_extract_text(f)
except Exception:
text = ""
return text
def parse_docx(file_bytes: bytes) -> str:
with io.BytesIO(file_bytes) as f:
doc = docx.Document(f)
texts = [p.text for p in doc.paragraphs]
return "\n".join(texts)
def parse_txt(file_bytes: bytes) -> str:
return file_bytes.decode(errors='ignore')
def parse_document(filename: str, file_bytes: bytes) -> str:
name = filename.lower()
if name.endswith(".pdf"):
return parse_pdf(file_bytes)
elif name.endswith(".docx"):
return parse_docx(file_bytes)
elif name.endswith(".txt"):
return parse_txt(file_bytes)
else:
# fallback try decode
return file_bytes.decode(errors='ignore')