diff --git a/web-app/django/VIM/apps/instruments/views/instrument_list.py b/web-app/django/VIM/apps/instruments/views/instrument_list.py index 3dce35b8..8fecd229 100644 --- a/web-app/django/VIM/apps/instruments/views/instrument_list.py +++ b/web-app/django/VIM/apps/instruments/views/instrument_list.py @@ -1,5 +1,6 @@ from typing import Union import logging +import re import pysolr import requests @@ -28,7 +29,7 @@ def count(self): # Helper classes to normalize Solr results class SolrInstrument: - def __init__(self, data: dict, lang_code: str = "en"): + def __init__(self, data: dict, lang_code: str = "en", highlight_dict: dict = None): sid = data.get("sid") self.pk = sid.replace("instrument-", "") if sid else "" self.umil_id = data.get("umil_id_s", "") @@ -38,6 +39,10 @@ def __init__(self, data: dict, lang_code: str = "en"): self.instrumentname_set = InstrumentNameSet( data.get(name_field, []), data.get(umil_label_name_field, None) ) + self.highlight_info: list[str] = [] + + if highlight_dict: + self.instrumentname_set.apply_highlights(highlight_dict) class ThumbnailStub: @@ -63,6 +68,23 @@ def __init__(self, names: Union[list[str], str], umil_label_name: str = None): def all(self) -> list[InstrumentNameStub]: return [InstrumentNameStub(name, self._umil_label_name) for name in self._names] + def apply_highlights(self, highlight_dict: dict): + """ + Apply Solr highlights to each name in the set. + highlight_dict: dict[str, str] mapping original text -> highlighted text + """ + lookup = {k.lower(): v for k, v in highlight_dict.items() if k} + + # Sort longer terms first to prevents partial overlaps + terms = sorted(highlight_dict.keys(), key=len, reverse=True) + + # Regex for all terms + pattern = re.compile(r"\b(" + "|".join(map(re.escape, terms)) + r")\b") + + self._names = [ + pattern.sub(lambda m: f"{m.group(1)}", name) for name in self._names + ] + def get_display_names_str(self) -> str: sorted_names = sorted(self.all(), key=lambda x: not x.umil_label) name_list = [n.name for n in sorted_names] @@ -268,14 +290,38 @@ def _get_solr_page_results( **query_params, "rows": page_size, "start": start, + "hl": "true", + "hl.fl": "text", + "hl.simple.pre": "", + "hl.simple.post": "", + "hl.snippets": 1000, + "hl.fragsize": 1, } # Remove our custom params lang_code = solr_params.pop("lang_code") solr_response = solr.search(**solr_params) - instruments = [ - SolrInstrument(doc, lang_code=lang_code) for doc in solr_response.docs - ] + + # Extract highlight info + highlight_info = getattr(solr_response, "highlighting", {}) + + instruments = [] + for doc in solr_response.docs: + pk = doc.get("sid", "").replace("instrument-", "") + hl_snippets = highlight_info.get(f"instrument-{pk}", {}).get("text", []) + # Map original -> highlighted + highlight_map = {} + + for snippet in hl_snippets: + for term in re.findall(r"(.*?)", snippet): + highlight_map[term] = f"{term}" + + inst = SolrInstrument( + doc, lang_code=lang_code, highlight_dict=highlight_map + ) + inst.highlight_info = hl_snippets + instruments.append(inst) + total_count = solr_response.hits # pysolr's hits corresponds to Solr's numFound # Return facet data if available diff --git a/web-app/django/VIM/templates/instruments/includes/masonryView.html b/web-app/django/VIM/templates/instruments/includes/masonryView.html index 2e5dd295..ff49f092 100644 --- a/web-app/django/VIM/templates/instruments/includes/masonryView.html +++ b/web-app/django/VIM/templates/instruments/includes/masonryView.html @@ -14,7 +14,7 @@ onerror="this.onerror=null;this.src='{% static "assets/images/instruments/no-image.svg" %}';" />
- {{ instrument.instrumentname_set.get_display_names_str }} + {{ instrument.instrumentname_set.get_display_names_str|safe }}
- {{ instrument.instrumentname_set.get_display_names_str }} + {{ instrument.instrumentname_set.get_display_names_str|safe }}