forked from endee-io/endee
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix.py
More file actions
19 lines (19 loc) · 873 Bytes
/
fix.py
File metadata and controls
19 lines (19 loc) · 873 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from endee import Endee, Precision
from sentence_transformers import SentenceTransformer
import json
from pathlib import Path
c = Endee()
c.set_base_url('http://localhost:8090/api/v1')
try:
c.delete_index('industrial_docs')
print('Deleted old index')
except: pass
docs = json.loads(Path('F:/endee/documents.json').read_text(encoding='utf-8'))
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
c.create_index(name='industrial_docs', dimension=384, space_type='cosine')
idx = c.get_index('industrial_docs')
texts = [d['title']+'. '+d['content'] for d in docs]
vecs = model.encode(texts, normalize_embeddings=True).tolist()
items = [{'id':d['id'],'vector':v,'meta':{'title':d['title'],'category':d['category'],'content':d['content']}} for d,v in zip(docs,vecs)]
idx.upsert(items)
print('Done! Indexed', len(items), 'documents!')