Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ env/

# Test / coverage
.pytest_cache/
backend/pytest-unit-results.xml
coverage.xml
htmlcov/

Expand Down
3 changes: 2 additions & 1 deletion backend/app/adapters/repos/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@ async def list_gt_paginated(
tags: list[str] | None = None,
exclude_tags: list[str] | None = None,
item_id: str | None = None,
ref_url: str | None = None,
plugin_filters: dict[str, str] | None = None,
keyword: str | None = None,
sort_by: SortField | None = None,
plugin_sort: str | None = None,
sort_order: SortOrder | None = None,
page: int = 1,
limit: int = 25,
Expand Down
212 changes: 77 additions & 135 deletions backend/app/adapters/repos/cosmos_repo.py

Large diffs are not rendered by default.

75 changes: 45 additions & 30 deletions backend/app/adapters/repos/memory_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Iterable
from uuid import UUID

from app.domain.conversation_fields import answer_text_from_item, question_text_from_item
from app.domain.enums import GroundTruthStatus, SortField, SortOrder
from app.domain.models import (
AgenticGroundTruthEntry,
Expand All @@ -15,6 +16,8 @@
PaginationMetadata,
Stats,
)
from app.plugins.base import PluginPackRegistry
from app.plugins.pack_registry import get_default_pack_registry

ZERO_UUID = UUID("00000000-0000-0000-0000-000000000000")

Expand All @@ -25,12 +28,14 @@ def __init__(
*,
items: list[AgenticGroundTruthEntry] | None = None,
curation_instructions: list[DatasetCurationInstructions] | None = None,
plugin_pack_registry: PluginPackRegistry | None = None,
) -> None:
self.items: dict[str, AgenticGroundTruthEntry] = {}
self._locations: dict[tuple[str, UUID, str], str] = {}
self._assignment_docs: dict[tuple[str, str], AssignmentDocument] = {}
self._curation: dict[str, DatasetCurationInstructions] = {}
self._etag_version = 0
self._plugin_pack_registry = plugin_pack_registry or get_default_pack_registry()

for item in items or []:
self._store_initial_item(item)
Expand All @@ -45,7 +50,9 @@ def _next_etag(self) -> str:
return f"memory-etag-{self._etag_version}"

def _clone_item(self, item: AgenticGroundTruthEntry) -> AgenticGroundTruthEntry:
return AgenticGroundTruthEntry.model_validate(item.model_dump(by_alias=True))
return AgenticGroundTruthEntry.model_validate(
item.model_dump(by_alias=True, exclude={"tags"})
)

def _clone_instruction(self, doc: DatasetCurationInstructions) -> DatasetCurationInstructions:
return DatasetCurationInstructions.model_validate(doc.model_dump(by_alias=True))
Expand Down Expand Up @@ -104,28 +111,30 @@ def _matches_location(
)

def _collect_urls(self, item: AgenticGroundTruthEntry) -> Iterable[str]:
for ref in item.refs:
yield ref.url
for turn in item.history or []:
for ref in getattr(turn, "refs", None) or []:
yield ref.url
for doc in self._plugin_pack_registry.collect_search_documents(item):
url = doc.get("url")
if isinstance(url, str) and url:
yield url

def _collect_text(self, item: AgenticGroundTruthEntry) -> str:
parts = [
item.id,
item.datasetName,
item.synth_question or "",
item.edited_question or "",
item.answer or "",
question_text_from_item(item),
answer_text_from_item(item),
item.comment or "",
]
for turn in item.history or []:
parts.append(turn.msg)
for ref in item.refs:
parts.extend([ref.title or "", ref.url, ref.content or "", ref.keyExcerpt or ""])
for turn in item.history or []:
for ref in getattr(turn, "refs", None) or []:
parts.extend([ref.title or "", ref.url, ref.content or "", ref.keyExcerpt or ""])
for doc in self._plugin_pack_registry.collect_search_documents(item):
parts.extend(
[
str(doc.get("id") or ""),
str(doc.get("title") or ""),
str(doc.get("url") or ""),
str(doc.get("chunk") or ""),
]
)
return " ".join(parts).lower()

def _is_unassigned_candidate(self, item: AgenticGroundTruthEntry) -> bool:
Expand All @@ -138,24 +147,27 @@ def _sort_items(
self,
items: list[AgenticGroundTruthEntry],
sort_by: SortField | None,
plugin_sort: str | None,
sort_order: SortOrder | None,
) -> list[AgenticGroundTruthEntry]:
field = sort_by or SortField.reviewed_at
reverse = (sort_order or SortOrder.desc) == SortOrder.desc

def key(item: AgenticGroundTruthEntry):
if plugin_sort:
plugin_value = self._plugin_pack_registry.plugin_sort_value(item, plugin_sort)
return (
plugin_value if plugin_value is not None else -1,
item.updated_at or datetime.min.replace(tzinfo=timezone.utc),
item.id,
)
if field == SortField.updated_at:
return item.updated_at or datetime.min.replace(tzinfo=timezone.utc)
if field == SortField.id:
return item.id
if field == SortField.has_answer:
return (
1 if (item.answer or "").strip() else 0,
item.updated_at or datetime.min.replace(tzinfo=timezone.utc),
)
if field == SortField.totalReferences:
return (
item.totalReferences,
1 if answer_text_from_item(item) else 0,
item.updated_at or datetime.min.replace(tzinfo=timezone.utc),
)
if field == SortField.tag_count:
Expand Down Expand Up @@ -194,7 +206,7 @@ async def list_gt_by_dataset(
items = [item for item in items if item.status == status]
return [
self._clone_item(item)
for item in self._sort_items(items, SortField.updated_at, SortOrder.desc)
for item in self._sort_items(items, SortField.updated_at, None, SortOrder.desc)
]

async def list_all_gt(
Expand All @@ -205,7 +217,7 @@ async def list_all_gt(
items = [item for item in items if item.status == status]
return [
self._clone_item(item)
for item in self._sort_items(items, SortField.updated_at, SortOrder.desc)
for item in self._sort_items(items, SortField.updated_at, None, SortOrder.desc)
]

async def list_gt_paginated(
Expand All @@ -215,9 +227,10 @@ async def list_gt_paginated(
tags: list[str] | None = None,
exclude_tags: list[str] | None = None,
item_id: str | None = None,
ref_url: str | None = None,
plugin_filters: dict[str, str] | None = None,
keyword: str | None = None,
sort_by: SortField | None = None,
plugin_sort: str | None = None,
sort_order: SortOrder | None = None,
page: int = 1,
limit: int = 25,
Expand All @@ -235,15 +248,17 @@ async def list_gt_paginated(
filtered = [item for item in filtered if not banned.intersection(set(item.tags))]
if item_id:
filtered = [item for item in filtered if item_id in item.id]
if ref_url:
if plugin_filters:
filtered = [
item for item in filtered if any(ref_url in url for url in self._collect_urls(item))
item
for item in filtered
if self._plugin_pack_registry.matches_query_filters(item, plugin_filters)
]
if keyword:
lowered = keyword.lower()
filtered = [item for item in filtered if lowered in self._collect_text(item)]

sorted_items = self._sort_items(filtered, sort_by, sort_order)
sorted_items = self._sort_items(filtered, sort_by, plugin_sort, sort_order)
total = len(sorted_items)
start = (page - 1) * limit
end = start + limit
Expand Down Expand Up @@ -325,7 +340,7 @@ async def list_unassigned(self, limit: int) -> list[AgenticGroundTruthEntry]:
items = [item for item in self.items.values() if self._is_unassigned_candidate(item)]
return [
self._clone_item(item)
for item in self._sort_items(items, SortField.updated_at, SortOrder.desc)[:limit]
for item in self._sort_items(items, SortField.updated_at, None, SortOrder.desc)[:limit]
]

async def sample_unassigned(
Expand All @@ -346,7 +361,7 @@ async def query_unassigned_by_dataset_prefix(
]
return [
self._clone_item(item)
for item in self._sort_items(items, SortField.updated_at, SortOrder.desc)[:take]
for item in self._sort_items(items, SortField.updated_at, None, SortOrder.desc)[:take]
]

async def query_unassigned_global(
Expand All @@ -360,7 +375,7 @@ async def query_unassigned_global(
]
return [
self._clone_item(item)
for item in self._sort_items(items, SortField.updated_at, SortOrder.desc)[:take]
for item in self._sort_items(items, SortField.updated_at, None, SortOrder.desc)[:take]
]

async def assign_to(self, item_id: str, user_id: str) -> bool:
Expand Down Expand Up @@ -399,7 +414,7 @@ async def list_assigned(self, user_id: str) -> list[AgenticGroundTruthEntry]:
]
return [
self._clone_item(item)
for item in self._sort_items(items, SortField.updated_at, SortOrder.desc)
for item in self._sort_items(items, SortField.updated_at, None, SortOrder.desc)
]

async def upsert_assignment_doc(
Expand Down
37 changes: 23 additions & 14 deletions backend/app/adapters/search/demo_search.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
from __future__ import annotations

from app.domain.models import AgenticGroundTruthEntry
from app.plugins.base import PluginPackRegistry
from app.plugins.pack_registry import get_default_pack_registry


class DemoSearchAdapter:
def __init__(self, items: list[AgenticGroundTruthEntry]) -> None:
def __init__(
self,
items: list[AgenticGroundTruthEntry],
plugin_pack_registry: PluginPackRegistry | None = None,
) -> None:
self._items = items
self._plugin_pack_registry = plugin_pack_registry or get_default_pack_registry()

async def query(self, q: str, top: int = 5) -> list[dict[str, object]]:
query = q.strip().lower()
Expand All @@ -15,30 +22,32 @@ async def query(self, q: str, top: int = 5) -> list[dict[str, object]]:
matches: list[dict[str, object]] = []
seen_urls: set[str] = set()
for item in self._items:
refs = list(item.refs)
for turn in item.history or []:
refs.extend(getattr(turn, "refs", None) or [])
for ref in refs:
for ref in self._plugin_pack_registry.collect_search_documents(item):
doc_id = ref.get("id")
url = ref.get("url")
if not isinstance(url, str) or not url:
continue
haystack = " ".join(
[
ref.url,
ref.title or "",
ref.content or "",
ref.keyExcerpt or "",
str(doc_id or ""),
url,
str(ref.get("title") or ""),
str(ref.get("chunk") or ""),
item.datasetName,
item.id,
]
).lower()
if query not in haystack:
continue
if ref.url in seen_urls:
if url in seen_urls:
continue
seen_urls.add(ref.url)
seen_urls.add(url)
matches.append(
{
"url": ref.url,
"title": ref.title,
"chunk": ref.content or ref.keyExcerpt or f"Reference for {item.id}",
"id": doc_id,
"url": url,
"title": ref.get("title"),
"chunk": ref.get("chunk") or f"Reference for {item.id}",
}
)
if len(matches) >= top:
Expand Down
3 changes: 0 additions & 3 deletions backend/app/api/v1/assignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
ETagRequiredError,
apply_shared_update,
persist_shared_update,
read_legacy_compat_update,
)
from app.services.validation_service import (
ApprovalValidationError,
Expand Down Expand Up @@ -137,7 +136,6 @@ async def update_item(
original_assigned_to = it.assignedTo

provided_fields: Set[str] = set(payload.model_fields_set)
payload_extras = payload.model_extra or {}
try:
mutation = apply_shared_update(
it,
Expand All @@ -157,7 +155,6 @@ async def update_item(
status=payload.status,
approve=bool(payload.approve),
actor_user_id=user.user_id,
legacy_update=read_legacy_compat_update(payload_extras),
clear_assignment_on_statuses={
GroundTruthStatus.approved,
GroundTruthStatus.deleted,
Expand Down
Loading
Loading