diff --git a/Makefile b/Makefile index 19c5155..5f46cbd 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,9 @@ coverage: ## Run tests with coverage python -m coverage erase - python -m coverage run -m pytest -ra + python -m coverage run -m pytest -ra -v python -m coverage report -m + python -m coverage html deps: ## Install dependencies pip install --upgrade pip @@ -21,7 +22,7 @@ lint: ## Lint python -m flake8 backend/src frontend/app.py tests test: ## Run tests - python -m pytest -ra + python -m pytest -ra -v build: make deps diff --git a/backend/src/executors.py b/backend/src/executors.py index 5db6203..84b7f59 100644 --- a/backend/src/executors.py +++ b/backend/src/executors.py @@ -72,7 +72,6 @@ def finetune(self, docs: DocumentArray, **kwargs) -> DocumentArray: train_data = [] for doc in docs: - matches = doc.matches for match in doc.matches: train_data.append( InputExample( diff --git a/frontend/app.py b/frontend/app.py index a6de81b..0c483aa 100644 --- a/frontend/app.py +++ b/frontend/app.py @@ -1,5 +1,6 @@ -import requests import json + +import requests import streamlit as st @@ -27,8 +28,8 @@ def search(abstract: str, host: str) -> dict: def finetune(doc: dict, match: dict, relevant: bool, host: str) -> dict: labeled = doc.copy() - labeled["tags"]["finetuner"] = {"label": 1 if relevant else -1} - match["tags"]["finetuner"] = {"label": 1 if relevant else -1} + labeled["tags"] = {"finetuner": {"label": 1 if relevant else -1}} + match["tags"] = {"finetuner": {"label": 1 if relevant else -1}} labeled["matches"] = [match] data = {"data": [labeled]} @@ -58,7 +59,6 @@ def match_score(match): if st.button(label="Search") or query: if query: doc = search(query, host) - max_score = max(match_score(match) for match in doc["matches"]) for match in doc["matches"]: @@ -78,14 +78,14 @@ def match_score(match): with col1: st.metric("Similarity (1 is best)", score, score_diff) with col2: - if st.button("✔️ Mark as relevant", id): + if st.button("✔️ Mark as relevant", id + "1"): with st.spinner("Finetuning..."): finetune(doc, match, True, host) st.success("Finetuned!") st.experimental_rerun() with col3: - if st.button("✖️ Mark as irrelevant", id): + if st.button("✖️ Mark as irrelevant", id + "0"): with st.spinner("Finetuning..."): finetune(doc, match, False, host) st.success("Finetuned!") diff --git a/tests/test_executors.py b/tests/test_executors.py index 3f39a50..b195920 100644 --- a/tests/test_executors.py +++ b/tests/test_executors.py @@ -23,17 +23,14 @@ def label_doc(docs): d = Document(text=papers[0]) matches = DocumentArray( - [Document(text=seq, tags={"finetuner": {"label": 1}}) for seq in papers[1:]] + [Document(text=seq, tags={"finetuner": {"label": 1.0}}) for seq in papers[1:]] ) - d.matches.extend(matches) - return d def encode_sequences(docs): embedded_docs = SpecterExecutor().encode(docs) - return embedded_docs @@ -55,3 +52,8 @@ def test_search(): results = flow.search(docs) return results + + +def test_finetuner(): + labelled_docs = DocumentArray(label_doc(docs)) + SpecterExecutor().finetune(labelled_docs)