Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

coverage: ## Run tests with coverage
python -m coverage erase
python -m coverage run -m pytest -ra
python -m coverage run -m pytest -ra -v
python -m coverage report -m
python -m coverage html

deps: ## Install dependencies
pip install --upgrade pip
Expand All @@ -21,7 +22,7 @@ lint: ## Lint
python -m flake8 backend/src frontend/app.py tests

test: ## Run tests
python -m pytest -ra
python -m pytest -ra -v

build:
make deps
Expand Down
1 change: 0 additions & 1 deletion backend/src/executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def finetune(self, docs: DocumentArray, **kwargs) -> DocumentArray:
train_data = []

for doc in docs:
matches = doc.matches
for match in doc.matches:
train_data.append(
InputExample(
Expand Down
12 changes: 6 additions & 6 deletions frontend/app.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests
import json

import requests
import streamlit as st


Expand Down Expand Up @@ -27,8 +28,8 @@ def search(abstract: str, host: str) -> dict:
def finetune(doc: dict, match: dict, relevant: bool, host: str) -> dict:
labeled = doc.copy()

labeled["tags"]["finetuner"] = {"label": 1 if relevant else -1}
match["tags"]["finetuner"] = {"label": 1 if relevant else -1}
labeled["tags"] = {"finetuner": {"label": 1 if relevant else -1}}
match["tags"] = {"finetuner": {"label": 1 if relevant else -1}}
labeled["matches"] = [match]
data = {"data": [labeled]}

Expand Down Expand Up @@ -58,7 +59,6 @@ def match_score(match):
if st.button(label="Search") or query:
if query:
doc = search(query, host)

max_score = max(match_score(match) for match in doc["matches"])

for match in doc["matches"]:
Expand All @@ -78,14 +78,14 @@ def match_score(match):
with col1:
st.metric("Similarity (1 is best)", score, score_diff)
with col2:
if st.button("✔️ Mark as relevant", id):
if st.button("✔️ Mark as relevant", id + "1"):
with st.spinner("Finetuning..."):
finetune(doc, match, True, host)
st.success("Finetuned!")
st.experimental_rerun()

with col3:
if st.button("✖️ Mark as irrelevant", id):
if st.button("✖️ Mark as irrelevant", id + "0"):
with st.spinner("Finetuning..."):
finetune(doc, match, False, host)
st.success("Finetuned!")
Expand Down
10 changes: 6 additions & 4 deletions tests/test_executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,14 @@ def label_doc(docs):
d = Document(text=papers[0])

matches = DocumentArray(
[Document(text=seq, tags={"finetuner": {"label": 1}}) for seq in papers[1:]]
[Document(text=seq, tags={"finetuner": {"label": 1.0}}) for seq in papers[1:]]
)

d.matches.extend(matches)

return d


def encode_sequences(docs):
embedded_docs = SpecterExecutor().encode(docs)

return embedded_docs


Expand All @@ -55,3 +52,8 @@ def test_search():
results = flow.search(docs)

return results


def test_finetuner():
labelled_docs = DocumentArray(label_doc(docs))
SpecterExecutor().finetune(labelled_docs)