From 7b0314a54bcd9b60f69f2bd16670f106f2ac29ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olov=20Ylinenp=C3=A4=C3=A4?= Date: Tue, 14 Apr 2026 18:17:47 +0200 Subject: [PATCH] perf(search): Add settings for _source.excludes - Add settings for ES _source.excludes - Filter out unused Item props -> cuts search response size in ~half - Less JSON to parse and mangle in both BE+FE TODO: - filter internal here, e.g. _links, _str, _topStr with this - rename+refactor libris_search_boost.json --- .../elasticsearch/libris_search_boost.json | 22 ++++++++++++++ .../main/groovy/whelk/search2/ESSettings.java | 29 ++++++++++++++----- .../src/main/groovy/whelk/search2/Query.java | 5 ++++ 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/librisxl-tools/elasticsearch/libris_search_boost.json b/librisxl-tools/elasticsearch/libris_search_boost.json index d64977c445..7c0bad6dd8 100644 --- a/librisxl-tools/elasticsearch/libris_search_boost.json +++ b/librisxl-tools/elasticsearch/libris_search_boost.json @@ -120,5 +120,27 @@ "value": "VirtualRecord", "score": 1000 } + ], + "source_excludes": [ + "@reverse.instanceOf.@reverse.itemOf.hasComponent", + "@reverse.instanceOf.@reverse.itemOf.itemOf", + "@reverse.instanceOf.@reverse.itemOf.librissearch:itemNote", + "@reverse.instanceOf.@reverse.itemOf.librissearch:shelf", + "@reverse.instanceOf.@reverse.itemOf.meta", + "@reverse.instanceOf.@reverse.itemOf.sameAs", + "@reverse.instanceOf.@reverse.itemOf.shelfControlNumber", + "@reverse.instanceOf.@reverse.itemOf.shelfMark", + "@reverse.instanceOf.@reverse.itemOf.subject", + "@reverse.instanceOf.@reverse.itemOf.summary", + "@reverse.itemOf.hasComponent", + "@reverse.itemOf.itemOf", + "@reverse.itemOf.librissearch:itemNote", + "@reverse.itemOf.librissearch:shelf", + "@reverse.itemOf.meta", + "@reverse.itemOf.sameAs", + "@reverse.itemOf.shelfControlNumber", + "@reverse.itemOf.shelfMark", + "@reverse.itemOf.subject", + "@reverse.itemOf.summary" ] } \ No newline at end of file diff --git a/whelk-core/src/main/groovy/whelk/search2/ESSettings.java b/whelk-core/src/main/groovy/whelk/search2/ESSettings.java index f33e3964c9..3575e838dd 100644 --- a/whelk-core/src/main/groovy/whelk/search2/ESSettings.java +++ b/whelk-core/src/main/groovy/whelk/search2/ESSettings.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -19,6 +20,7 @@ public class ESSettings { private EsMappings mappings; private final Boost boost; + private final List sourceExcludes; private int maxItems; @@ -28,6 +30,7 @@ public ESSettings(Whelk whelk) { this.maxItems = whelk.elastic.maxResultWindow; } this.boost = loadBoostSettings(); + this.sourceExcludes = loadSourceExcludesSettings(); } // For test only @@ -43,6 +46,7 @@ private ESSettings(EsMappings mappings, Boost boost, int maxItems) { this.mappings = mappings; this.boost = boost; this.maxItems = maxItems; + this.sourceExcludes = Collections.emptyList(); } public boolean isConfigured() { @@ -57,6 +61,10 @@ public Boost boost() { return boost; } + public List sourceExcludes() { + return sourceExcludes; + } + public int maxItems() { return maxItems; } @@ -66,6 +74,11 @@ public Boost loadBoostSettings() { return new Boost(settings); } + private List loadSourceExcludesSettings() { + Map settings = toMap(Boost.class.getClassLoader().getResourceAsStream(BOOST_SETTINGS_FILE)); + return getAsStream(settings, "source_excludes").map(String.class::cast).toList(); + } + public static Boost loadBoostSettings(String json) { return new Boost(toMap(json)); } @@ -226,10 +239,6 @@ private static ConstantScore load(Map settings) { } } - private static Stream getAsStream(Map m, String k) { - return getOrDefault(m, k, List.of()).stream(); - } - private static Map getAsMap(Map m, String k) { return getOrDefault(m, k, Map.of()); } @@ -237,11 +246,15 @@ private static Map getAsMap(Map m, String k) { private static float getAsFloat(Map m, String k) { return ((Number) m.get(k)).floatValue(); } + } - @SuppressWarnings("unchecked") - private static T getOrDefault(Map m, String k, T defaultTo) { - return m.containsKey(k) ? (T) m.get(k) : defaultTo; - } + private static Stream getAsStream(Map m, String k) { + return getOrDefault(m, k, List.of()).stream(); + } + + @SuppressWarnings("unchecked") + private static T getOrDefault(Map m, String k, T defaultTo) { + return m.containsKey(k) ? (T) m.get(k) : defaultTo; } private static Map toMap(Object json) { diff --git a/whelk-core/src/main/groovy/whelk/search2/Query.java b/whelk-core/src/main/groovy/whelk/search2/Query.java index 2cecf460da..604335bc60 100644 --- a/whelk-core/src/main/groovy/whelk/search2/Query.java +++ b/whelk-core/src/main/groovy/whelk/search2/Query.java @@ -143,6 +143,7 @@ protected EsQuery doGetEsQuery() { EsQueryTree esQueryTree = new EsQueryTree(expandedQueryTree, currentEsSettings, getSelectedFacets()); var esQueryDsl = buildEsQueryDsl(esQueryTree.getMainQuery(), esQueryTree.getPostFilter()); esQueryDsl.put("aggs", getEsAggQuery(getFullQueryTree().getRdfSubjectTypesList())); + return new EsQuery(esQueryDsl, indexNames); } @@ -268,6 +269,10 @@ private Map buildEsQueryDsl(Map mainQuery, Map