From f94bda39b10e43086fdda4403e97c2cb42ad5455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Tue, 17 Dec 2024 11:27:37 +0100 Subject: [PATCH 01/19] Add functionality for sorting facets/observations into a tree --- .../main/groovy/whelk/search2/FacetTree.java | 68 ++++++++ .../src/main/groovy/whelk/search2/Query.java | 3 + .../groovy/whelk/search2/FacetTreeSpec.groovy | 164 ++++++++++++++++++ 3 files changed, 235 insertions(+) create mode 100644 whelk-core/src/main/groovy/whelk/search2/FacetTree.java create mode 100644 whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java new file mode 100644 index 0000000000..6d11eed385 --- /dev/null +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -0,0 +1,68 @@ +package whelk.search2; + +import whelk.JsonLd; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.stream.Collectors; + +import static whelk.util.DocumentUtil.getAtPath; + +public class FacetTree { + + private final JsonLd jsonLd; + + public FacetTree(JsonLd jsonLd) { + this.jsonLd = jsonLd; + } + + public List> sortObservationsAsTree(List> observations) { + List> tree = new ArrayList<>(); + Queue> queue = new ConcurrentLinkedQueue<>(); + + observations.forEach(observation -> { + var parent = findParent(observation, observations); + + if (parent == null) { + tree.add(observation); + queue.add(observation); + } + }); + + while (!queue.isEmpty()) { + var observation = queue.remove(); + var children = findChildren(observation, observations); + if (!children.isEmpty()) { + queue.addAll(children); + observation.put("children", children); + } + } + return List.copyOf(tree); + } + + private Map findParent(Map observation, List> observations) { + return observations.stream() + .filter(o -> isSubCLass(observation, o)) + .findFirst().orElse(null); + } + + private List> findChildren(Map observation, List> observations) { + return observations.stream() + .filter(o -> isSubCLass(o, observation)) + .collect(Collectors.toList()); + } + + private boolean isSubCLass(Map obsA, Map obsB) { + String idA = get(obsA, List.of("object", "@id"), ""); + String idB = get(obsB, List.of("object", "@id"), ""); + return !idA.equals(idB) && jsonLd.isSubClassOf(idA, idB); + } + + @SuppressWarnings("unchecked") + private static T get(Object m, List path, T defaultTo) { + return (T) getAtPath(m, path, defaultTo); + } +} diff --git a/whelk-core/src/main/groovy/whelk/search2/Query.java b/whelk-core/src/main/groovy/whelk/search2/Query.java index 97e6ca674c..ecbf139ec9 100644 --- a/whelk-core/src/main/groovy/whelk/search2/Query.java +++ b/whelk-core/src/main/groovy/whelk/search2/Query.java @@ -463,6 +463,9 @@ private Map buildSliceByDimension(Map(); var observations = getObservations(propertyKey, buckets); + if (property.name().equals(Disambiguate.RDF_TYPE)) { + observations = new FacetTree(jsonLd).sortObservationsAsTree(observations); + } if (!observations.isEmpty()) { if (selectedFilters.isRangeFilter(propertyKey)) { sliceNode.put("search", getRangeTemplate(propertyKey)); diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy new file mode 100644 index 0000000000..f69bc1a349 --- /dev/null +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -0,0 +1,164 @@ +package whelk.search2 + +import spock.lang.Specification +import whelk.JsonLd + +class FacetTreeSpec extends Specification { + + JsonLd jsonLd + + void setup() { + jsonLd = GroovyMock(JsonLd.class) + } + + def "Single observation should return list with one observation"() { + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] + } + + def "Sort one parent and one child"() { + given: + jsonLd.isSubClassOf("child", "parent") >> { + true + } + jsonLd.isSubClassOf("parent", "child") >> { + false + } + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]], + ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "children": [["object": ["@id": "child"]]]]] + } + + def "Sort one parent with two children"() { + given: + jsonLd.isSubClassOf("child1", "parent") >> { + true + } + jsonLd.isSubClassOf("child2", "parent") >> { + true + } + jsonLd.isSubClassOf("parent", "child1") >> { + false + } + jsonLd.isSubClassOf("parent", "child2") >> { + false + } + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], + "children": [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } + + def "Sort one parent with one child that has one child"() { + given: + jsonLd.isSubClassOf("child1", "parent") >> { + true + } + jsonLd.isSubClassOf("child2", "parent") >> { + false + } + jsonLd.isSubClassOf("child2", "child1") >> { + true + } + jsonLd.isSubClassOf("parent", "child1") >> { + false + } + jsonLd.isSubClassOf("parent", "child2") >> { + false + } + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], + "children": [["object": ["@id": "child1"], + "children": [["object": ["@id": "child2"]]]]]]] + } + + def "One parent, two children"() { + given: + jsonLd.isSubClassOf("child1", "root") >> { + true + } + jsonLd.isSubClassOf("child2", "root") >> { + true + } + jsonLd.isSubClassOf("root", "child1") >> { + false + } + jsonLd.isSubClassOf("root", "child2") >> { + false + } + jsonLd.isSubClassOf("child1", "child2") >> { + false + } + jsonLd.isSubClassOf("child2", "child1") >> { + false + } + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "children" : [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } + + def "Three root nodes"() { + given: + jsonLd.isSubClassOf(_, _) >> false + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root1"]], + ["object": ["@id": "root2"]], + ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], + ["object": ["@id": "root2"]], + ["object": ["@id": "root3"]]] + } + + def "Children should not be considered parents of themselves"() { + given: + jsonLd.isSubClassOf(_, _) >> true + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] + } +} From 6ee6583207a6565bc3b322aa4204ece1e19bb3af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Wed, 18 Dec 2024 16:40:40 +0100 Subject: [PATCH 02/19] Fix method name --- whelk-core/src/main/groovy/whelk/search2/FacetTree.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 6d11eed385..9620b5fc70 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -45,17 +45,17 @@ public List> sortObservationsAsTree(List private Map findParent(Map observation, List> observations) { return observations.stream() - .filter(o -> isSubCLass(observation, o)) + .filter(o -> isSubClass(observation, o)) .findFirst().orElse(null); } private List> findChildren(Map observation, List> observations) { return observations.stream() - .filter(o -> isSubCLass(o, observation)) + .filter(o -> isSubClass(o, observation)) .collect(Collectors.toList()); } - private boolean isSubCLass(Map obsA, Map obsB) { + private boolean isSubClass(Map obsA, Map obsB) { String idA = get(obsA, List.of("object", "@id"), ""); String idB = get(obsB, List.of("object", "@id"), ""); return !idA.equals(idB) && jsonLd.isSubClassOf(idA, idB); From f0ff792c7b43e5c2a7da5bf81a3e3feca8837775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Tue, 14 Jan 2025 11:05:56 +0100 Subject: [PATCH 03/19] Remove redundant use of JsonLd class --- .../main/groovy/whelk/search2/FacetTree.java | 10 +- .../src/main/groovy/whelk/search2/Stats.java | 315 ++++++++++++++++++ .../groovy/whelk/search2/FacetTreeSpec.groovy | 57 ++-- 3 files changed, 347 insertions(+), 35 deletions(-) create mode 100644 whelk-core/src/main/groovy/whelk/search2/Stats.java diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 9620b5fc70..9d002d92d7 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -1,7 +1,5 @@ package whelk.search2; -import whelk.JsonLd; - import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -13,10 +11,10 @@ public class FacetTree { - private final JsonLd jsonLd; + private final Disambiguate disambiguate; - public FacetTree(JsonLd jsonLd) { - this.jsonLd = jsonLd; + public FacetTree(Disambiguate disambiguate) { + this.disambiguate = disambiguate; } public List> sortObservationsAsTree(List> observations) { @@ -58,7 +56,7 @@ private List> findChildren(Map observation, private boolean isSubClass(Map obsA, Map obsB) { String idA = get(obsA, List.of("object", "@id"), ""); String idB = get(obsB, List.of("object", "@id"), ""); - return !idA.equals(idB) && jsonLd.isSubClassOf(idA, idB); + return !idA.equals(idB) && disambiguate.isSubclassOf(idA, idB); } @SuppressWarnings("unchecked") diff --git a/whelk-core/src/main/groovy/whelk/search2/Stats.java b/whelk-core/src/main/groovy/whelk/search2/Stats.java new file mode 100644 index 0000000000..d7fe476664 --- /dev/null +++ b/whelk-core/src/main/groovy/whelk/search2/Stats.java @@ -0,0 +1,315 @@ +package whelk.search2; + +import whelk.JsonLd; +import whelk.search2.querytree.FreeText; +import whelk.search2.querytree.Link; +import whelk.search2.querytree.Literal; +import whelk.search2.querytree.Node; +import whelk.search2.querytree.PathValue; +import whelk.search2.querytree.Property; +import whelk.search2.querytree.PropertyValue; +import whelk.search2.querytree.QueryTree; +import whelk.search2.querytree.Value; +import whelk.search2.querytree.VocabTerm; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; + +import static whelk.search2.QueryUtil.makeFindUrl; +import static whelk.search2.QueryUtil.makeParams; + +public class Stats { + private final Disambiguate disambiguate; + private final QueryResult queryResult; + private final QueryParams queryParams; + private final AppParams appParams; + private final QueryTree queryTree; + private final QueryUtil queryUtil; + + public Stats(Disambiguate disambiguate, + QueryUtil queryUtil, + QueryTree queryTree, + QueryResult queryResult, + QueryParams queryParams, + AppParams appParams + ) { + this.disambiguate = disambiguate; + this.queryResult = queryResult; + this.queryParams = queryParams; + this.appParams = appParams; + this.queryTree = queryTree; + this.queryUtil = queryUtil; + } + + public Map build() { + var sliceByDimension = getSliceByDimension(); + var boolFilters = getBoolFilters(); + // TODO: Use Multi search API instead of doing two elastic roundtrips? + // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html + var predicates = getCuratedPredicateLinks(); + return Map.of(JsonLd.ID_KEY, "#stats", + "sliceByDimension", sliceByDimension, + "_boolFilters", boolFilters, + "_predicates", predicates); + } + + private Map getSliceByDimension() { + var buckets = collectBuckets(); + var rangeProps = appParams.statsRepr.getRangeProperties(); + return buildSliceByDimension(buckets, rangeProps); + } + + // Problem: Same value in different fields will be counted twice, e.g. contribution.agent + instanceOf.contribution.agent + private Map> collectBuckets() { + Map sliceParamsByProperty = appParams.statsRepr.getSliceByProperty(); + Map> propertyNameToBucketCounts = new LinkedHashMap<>(); + + // TODO: Decide how to handle properties that can appear at both instance and work level. + // Probably not the best idea to just add the counts together like we do now, since it's both inconvenient + // and not guaranteed to produce a correct number. + for (var agg : queryResult.aggs) { + for (var b : agg.buckets()) { + var buckets = propertyNameToBucketCounts.computeIfAbsent(agg.property(), x -> new HashMap<>()); + buckets.compute(b.value(), (k, v) -> v == null ? b.count() : v + b.count()); + } + } + + Map> propertyToBucketCounts = new LinkedHashMap<>(); + + for (Property property : sliceParamsByProperty.keySet()) { + var buckets = propertyNameToBucketCounts.get(property.name()); + if (buckets != null) { + int maxBuckets = sliceParamsByProperty.get(property).size(); + Map newBuckets = new LinkedHashMap<>(); + buckets.entrySet() + .stream() + .sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())) + .limit(Math.min(maxBuckets, buckets.size())) + .forEach(entry -> { + String bucketKey = entry.getKey(); + int count = entry.getValue(); + Value v; + if (property.hasVocabValue()) { + v = new VocabTerm(bucketKey, disambiguate.getDefinition(bucketKey)); + } else if (property.isObjectProperty()) { + v = new Link(bucketKey, disambiguate.getChip(bucketKey)); + } else { + v = new Literal(bucketKey); + } + newBuckets.put(new PropertyValue(property, Operator.EQUALS, v), count); + }); + propertyToBucketCounts.put(property, newBuckets); + } + } + + return propertyToBucketCounts; + } + + private Map buildSliceByDimension(Map> propToBuckets, Set rangeProps) { + Map nonQueryParams = queryParams.getNonQueryParams(0); + Map sliceParamsByProperty = appParams.statsRepr.getSliceByProperty(); + + Map sliceByDimension = new LinkedHashMap<>(); + + propToBuckets.forEach((property, buckets) -> { + var sliceNode = new LinkedHashMap<>(); + var isRange = rangeProps.contains(property); + var observations = getObservations(buckets, isRange ? queryTree.removeTopLevelPropValueWithRangeIfPropEquals(property) : queryTree, nonQueryParams); + if (property.name().equals(Disambiguate.Rdfs.RDF_TYPE)) { + observations = new FacetTree(disambiguate).sortObservationsAsTree(observations); + } + if (!observations.isEmpty()) { + if (isRange) { + sliceNode.put("search", getRangeTemplate(property, makeParams(nonQueryParams))); + } + sliceNode.put("dimension", property.name()); + sliceNode.put("observation", observations); + sliceNode.put("maxItems", sliceParamsByProperty.get(property).size()); + sliceByDimension.put(property.name(), sliceNode); + } + }); + + return sliceByDimension; + } + + private List> getObservations(Map buckets, QueryTree qt, Map nonQueryParams) { + List> observations = new ArrayList<>(); + + buckets.forEach((pv, count) -> { + Map observation = new LinkedHashMap<>(); + boolean queried = qt.getTopLevelPvNodes().contains(pv) + || pv.value().string().equals(nonQueryParams.get(QueryParams.ApiParams.OBJECT)); + if (!queried) { + observation.put("totalItems", count); + var url = makeFindUrl(qt.addToTopLevel(pv), nonQueryParams); + observation.put("view", Map.of(JsonLd.ID_KEY, url)); + observation.put("object", pv.value().description()); + observations.add(observation); + } + }); + + return observations; + } + + private Map getRangeTemplate(Property property, List nonQueryParams) { + var GtLtNodes = queryTree.getTopLevelPvNodes().stream() + .filter(pv -> pv.property().equals(property)) + .filter(pv -> switch (pv.operator()) { + case EQUALS, NOT_EQUALS -> false; + case GREATER_THAN_OR_EQUALS, GREATER_THAN, LESS_THAN_OR_EQUALS, LESS_THAN -> true; + }) + .filter(pv -> pv.value().isNumeric()) + .toList(); + + String min = null; + String max = null; + + for (var pv : GtLtNodes) { + var orEquals = pv.toOrEquals(); + if (orEquals.operator() == Operator.GREATER_THAN_OR_EQUALS && min == null) { + min = orEquals.value().string(); + } else if (orEquals.operator() == Operator.LESS_THAN_OR_EQUALS && max == null) { + max = orEquals.value().string(); + } else { + // Not a proper range, reset and abort + min = null; + max = null; + break; + } + } + + var tree = queryTree.removeTopLevelPropValueIfPropEquals(property); + + Map template = new LinkedHashMap<>(); + + var placeholderNode = new FreeText(Operator.EQUALS, String.format("{?%s}", property.name())); + var templateQueryString = tree.addToTopLevel(placeholderNode).toString(); + var templateUrl = makeFindUrl(tree.getTopLevelFreeText(), templateQueryString, nonQueryParams); + template.put("template", templateUrl); + + var mapping = new LinkedHashMap<>(); + mapping.put("variable", property.name()); + mapping.put(Operator.GREATER_THAN_OR_EQUALS.termKey, Objects.toString(min, "")); + mapping.put(Operator.LESS_THAN_OR_EQUALS.termKey, Objects.toString(max, "")); + template.put("mapping", mapping); + + return template; + } + + private List> getBoolFilters() { + var nonQueryParams = queryParams.getNonQueryParams(0); + List> results = new ArrayList<>(); + var existing = queryTree.getTopLevelNodes(); + + for (var f : appParams.siteFilters.optionalFilters()) { + QueryTree newTree; + boolean isSelected; + Node filter = f.getAlias().orElse(f.getExplicit()); + + if (existing.contains(filter)) { + newTree = queryTree.removeTopLevelNode(filter); + isSelected = true; + } else { + newTree = queryTree.addToTopLevel(filter); + isSelected = false; + } + + Map res = new LinkedHashMap<>(); + // TODO: fix form + res.put("totalItems", 0); + res.put("object", Map.of(JsonLd.TYPE_KEY, "Resource", "prefLabelByLang", f.getPrefLabelByLang())); + res.put("view", Map.of(JsonLd.ID_KEY, makeFindUrl(newTree, nonQueryParams))); + res.put("_selected", isSelected); + + results.add(res); + } + + return results; + } + + // TODO naming things "curated predicate links" ?? + private List> getCuratedPredicateLinks() { + var o = getObject(queryParams); + if (o == null) { + return Collections.emptyList(); + } + var curatedPredicates = curatedPredicates(o, appParams.relationFilters); + if (curatedPredicates.isEmpty()) { + return Collections.emptyList(); + } + QueryResult queryRes = new QueryResult(queryUtil.query(getCuratedPredicateEsQueryDsl(o, curatedPredicates))); + return predicateLinks(queryRes.pAggs, o, queryParams.getNonQueryParams(0)); + } + + private List curatedPredicates(Entity object, Map> relationFilters) { + return object.superclassesIncludingSelf() + .stream() + .filter(relationFilters::containsKey) + .findFirst().map(relationFilters::get) + .orElse(Collections.emptyList()); + } + + private Map getCuratedPredicateEsQueryDsl(Entity o, List curatedPredicates) { + var queryDsl = new LinkedHashMap(); + queryDsl.put("query", new PathValue("_links", Operator.EQUALS, o.id()).toEs()); + queryDsl.put("size", 0); + queryDsl.put("from", 0); + queryDsl.put("aggs", Aggs.buildPAggQuery(o, curatedPredicates, disambiguate)); + queryDsl.put("track_total_hits", true); + return queryDsl; + } + + private List> predicateLinks(List aggs, Entity object, + Map nonQueryParams) { + var result = new ArrayList>(); + Set selected = new HashSet<>(); + if (nonQueryParams.containsKey(QueryParams.ApiParams.PREDICATES)) { + selected.add(nonQueryParams.get(QueryParams.ApiParams.PREDICATES)); + } + + Map counts = aggs.stream().collect(Collectors.toMap(Aggs.Bucket::value, Aggs.Bucket::count)); + + for (String p : curatedPredicates(object, appParams.relationFilters)) { + if (!counts.containsKey(p)) { + continue; + } + + int count = counts.get(p); + + if (count > 0) { + Map params = new HashMap<>(nonQueryParams); + params.put(QueryParams.ApiParams.PREDICATES, p); + result.add(Map.of( + "totalItems", count, + "view", Map.of(JsonLd.ID_KEY, makeFindUrl(makeParams(params))), + "object", disambiguate.getDefinition(p), + "_selected", selected.contains(p) + )); + } + } + + return result; + } + + private Entity getObject(QueryParams queryParams) { + var object = queryParams.object; + if (object != null) { + return queryUtil.loadThing(object) + .map(thing -> thing.getOrDefault(JsonLd.TYPE_KEY, null)) + .map(JsonLd::asList) + .map(List::getFirst) + .map(type -> new Entity(object, (String) type, disambiguate.getSuperclasses((String) type))) + .orElse(null); + } + return null; + } +} diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index f69bc1a349..709218ef07 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -1,19 +1,18 @@ package whelk.search2 import spock.lang.Specification -import whelk.JsonLd class FacetTreeSpec extends Specification { - JsonLd jsonLd + Disambiguate disambiguate void setup() { - jsonLd = GroovyMock(JsonLd.class) + disambiguate = GroovyMock(Disambiguate.class) } def "Single observation should return list with one observation"() { expect: - def tree = new FacetTree(jsonLd) + def tree = new FacetTree(disambiguate) tree.sortObservationsAsTree(observations) == sorted where: @@ -23,15 +22,15 @@ class FacetTreeSpec extends Specification { def "Sort one parent and one child"() { given: - jsonLd.isSubClassOf("child", "parent") >> { + disambiguate.isSubclassOf("child", "parent") >> { true } - jsonLd.isSubClassOf("parent", "child") >> { + disambiguate.isSubclassOf("parent", "child") >> { false } expect: - def tree = new FacetTree(jsonLd) + def tree = new FacetTree(disambiguate) tree.sortObservationsAsTree(observations) == sorted where: @@ -42,21 +41,21 @@ class FacetTreeSpec extends Specification { def "Sort one parent with two children"() { given: - jsonLd.isSubClassOf("child1", "parent") >> { + disambiguate.isSubclassOf("child1", "parent") >> { true } - jsonLd.isSubClassOf("child2", "parent") >> { + disambiguate.isSubclassOf("child2", "parent") >> { true } - jsonLd.isSubClassOf("parent", "child1") >> { + disambiguate.isSubclassOf("parent", "child1") >> { false } - jsonLd.isSubClassOf("parent", "child2") >> { + disambiguate.isSubclassOf("parent", "child2") >> { false } expect: - def tree = new FacetTree(jsonLd) + def tree = new FacetTree(disambiguate) tree.sortObservationsAsTree(observations) == sorted where: @@ -70,24 +69,24 @@ class FacetTreeSpec extends Specification { def "Sort one parent with one child that has one child"() { given: - jsonLd.isSubClassOf("child1", "parent") >> { + disambiguate.isSubclassOf("child1", "parent") >> { true } - jsonLd.isSubClassOf("child2", "parent") >> { + disambiguate.isSubclassOf("child2", "parent") >> { false } - jsonLd.isSubClassOf("child2", "child1") >> { + disambiguate.isSubclassOf("child2", "child1") >> { true } - jsonLd.isSubClassOf("parent", "child1") >> { + disambiguate.isSubclassOf("parent", "child1") >> { false } - jsonLd.isSubClassOf("parent", "child2") >> { + disambiguate.isSubclassOf("parent", "child2") >> { false } expect: - def tree = new FacetTree(jsonLd) + def tree = new FacetTree(disambiguate) tree.sortObservationsAsTree(observations) == sorted where: @@ -101,27 +100,27 @@ class FacetTreeSpec extends Specification { def "One parent, two children"() { given: - jsonLd.isSubClassOf("child1", "root") >> { + disambiguate.isSubclassOf("child1", "root") >> { true } - jsonLd.isSubClassOf("child2", "root") >> { + disambiguate.isSubclassOf("child2", "root") >> { true } - jsonLd.isSubClassOf("root", "child1") >> { + disambiguate.isSubclassOf("root", "child1") >> { false } - jsonLd.isSubClassOf("root", "child2") >> { + disambiguate.isSubclassOf("root", "child2") >> { false } - jsonLd.isSubClassOf("child1", "child2") >> { + disambiguate.isSubclassOf("child1", "child2") >> { false } - jsonLd.isSubClassOf("child2", "child1") >> { + disambiguate.isSubclassOf("child2", "child1") >> { false } expect: - def tree = new FacetTree(jsonLd) + def tree = new FacetTree(disambiguate) tree.sortObservationsAsTree(observations) == sorted where: @@ -134,10 +133,10 @@ class FacetTreeSpec extends Specification { def "Three root nodes"() { given: - jsonLd.isSubClassOf(_, _) >> false + disambiguate.isSubclassOf(_, _) >> false expect: - def tree = new FacetTree(jsonLd) + def tree = new FacetTree(disambiguate) tree.sortObservationsAsTree(observations) == sorted where: @@ -151,10 +150,10 @@ class FacetTreeSpec extends Specification { def "Children should not be considered parents of themselves"() { given: - jsonLd.isSubClassOf(_, _) >> true + disambiguate.isSubclassOf(_, _) >> true expect: - def tree = new FacetTree(jsonLd) + def tree = new FacetTree(disambiguate) tree.sortObservationsAsTree(observations) == sorted where: From a09e6e1f43eb87981502a512ff59063615526a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Tue, 14 Jan 2025 11:33:59 +0100 Subject: [PATCH 04/19] Catch both instance and work types --- whelk-core/src/main/groovy/whelk/search2/Stats.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/Stats.java b/whelk-core/src/main/groovy/whelk/search2/Stats.java index d7fe476664..2e48cb8933 100644 --- a/whelk-core/src/main/groovy/whelk/search2/Stats.java +++ b/whelk-core/src/main/groovy/whelk/search2/Stats.java @@ -124,7 +124,7 @@ private Map buildSliceByDimension(Map(); var isRange = rangeProps.contains(property); var observations = getObservations(buckets, isRange ? queryTree.removeTopLevelPropValueWithRangeIfPropEquals(property) : queryTree, nonQueryParams); - if (property.name().equals(Disambiguate.Rdfs.RDF_TYPE)) { + if (property.isType()) { observations = new FacetTree(disambiguate).sortObservationsAsTree(observations); } if (!observations.isEmpty()) { From 0fa5ee8aa34cc793f1cbb346c800d5bf7b1680a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Thu, 16 Jan 2025 10:34:09 +0100 Subject: [PATCH 05/19] children -> _children --- .../src/main/groovy/whelk/search2/FacetTree.java | 2 +- .../src/test/groovy/whelk/search2/FacetTreeSpec.groovy | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 9d002d92d7..b6721d3b9c 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -35,7 +35,7 @@ public List> sortObservationsAsTree(List var children = findChildren(observation, observations); if (!children.isEmpty()) { queue.addAll(children); - observation.put("children", children); + observation.put("_children", children); } } return List.copyOf(tree); diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index 709218ef07..7bf327ed86 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -36,7 +36,7 @@ class FacetTreeSpec extends Specification { where: observations | sorted [["object": ["@id": "parent"]], - ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "children": [["object": ["@id": "child"]]]]] + ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child"]]]]] } def "Sort one parent with two children"() { @@ -63,7 +63,7 @@ class FacetTreeSpec extends Specification { [["object": ["@id": "parent"]], ["object": ["@id": "child1"]], ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], - "children": [["object": ["@id": "child1"]], + "_children": [["object": ["@id": "child1"]], ["object": ["@id": "child2"]]]]] } @@ -94,8 +94,8 @@ class FacetTreeSpec extends Specification { [["object": ["@id": "parent"]], ["object": ["@id": "child1"]], ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], - "children": [["object": ["@id": "child1"], - "children": [["object": ["@id": "child2"]]]]]]] + "_children": [["object": ["@id": "child1"], + "_children": [["object": ["@id": "child2"]]]]]]] } def "One parent, two children"() { @@ -127,7 +127,7 @@ class FacetTreeSpec extends Specification { observations | sorted [["object": ["@id": "root"]], ["object": ["@id": "child1"]], - ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "children" : [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "_children" : [["object": ["@id": "child1"]], ["object": ["@id": "child2"]]]]] } From ca3bcefd67dea14518eee066605146654f07e291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Fri, 17 Jan 2025 17:38:11 +0100 Subject: [PATCH 06/19] Revert "Remove redundant use of JsonLd class" This reverts commit c15d1793bfa3b5f7cde351fdca2237fda62f5a16. --- .../main/groovy/whelk/search2/FacetTree.java | 10 ++-- .../src/main/groovy/whelk/search2/Stats.java | 8 ++- .../groovy/whelk/search2/FacetTreeSpec.groovy | 57 ++++++++++--------- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index b6721d3b9c..5ddd451932 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -1,5 +1,7 @@ package whelk.search2; +import whelk.JsonLd; + import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -11,10 +13,10 @@ public class FacetTree { - private final Disambiguate disambiguate; + private final JsonLd jsonLd; - public FacetTree(Disambiguate disambiguate) { - this.disambiguate = disambiguate; + public FacetTree(JsonLd jsonLd) { + this.jsonLd = jsonLd; } public List> sortObservationsAsTree(List> observations) { @@ -56,7 +58,7 @@ private List> findChildren(Map observation, private boolean isSubClass(Map obsA, Map obsB) { String idA = get(obsA, List.of("object", "@id"), ""); String idB = get(obsB, List.of("object", "@id"), ""); - return !idA.equals(idB) && disambiguate.isSubclassOf(idA, idB); + return !idA.equals(idB) && jsonLd.isSubClassOf(idA, idB); } @SuppressWarnings("unchecked") diff --git a/whelk-core/src/main/groovy/whelk/search2/Stats.java b/whelk-core/src/main/groovy/whelk/search2/Stats.java index 2e48cb8933..be9118d4dc 100644 --- a/whelk-core/src/main/groovy/whelk/search2/Stats.java +++ b/whelk-core/src/main/groovy/whelk/search2/Stats.java @@ -34,13 +34,15 @@ public class Stats { private final AppParams appParams; private final QueryTree queryTree; private final QueryUtil queryUtil; + private final JsonLd jsonLd; public Stats(Disambiguate disambiguate, QueryUtil queryUtil, QueryTree queryTree, QueryResult queryResult, QueryParams queryParams, - AppParams appParams + AppParams appParams, + JsonLd jsonLd ) { this.disambiguate = disambiguate; this.queryResult = queryResult; @@ -48,6 +50,7 @@ public Stats(Disambiguate disambiguate, this.appParams = appParams; this.queryTree = queryTree; this.queryUtil = queryUtil; + this.jsonLd = jsonLd; } public Map build() { @@ -124,8 +127,9 @@ private Map buildSliceByDimension(Map(); var isRange = rangeProps.contains(property); var observations = getObservations(buckets, isRange ? queryTree.removeTopLevelPropValueWithRangeIfPropEquals(property) : queryTree, nonQueryParams); + if (property.isType()) { - observations = new FacetTree(disambiguate).sortObservationsAsTree(observations); + observations = new FacetTree(jsonLd).sortObservationsAsTree(observations); } if (!observations.isEmpty()) { if (isRange) { diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index 7bf327ed86..ab664d3572 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -1,18 +1,19 @@ package whelk.search2 import spock.lang.Specification +import whelk.JsonLd class FacetTreeSpec extends Specification { - Disambiguate disambiguate + JsonLd jsonLd void setup() { - disambiguate = GroovyMock(Disambiguate.class) + jsonLd = GroovyMock(JsonLd.class) } def "Single observation should return list with one observation"() { expect: - def tree = new FacetTree(disambiguate) + def tree = new FacetTree(jsonLd) tree.sortObservationsAsTree(observations) == sorted where: @@ -22,15 +23,15 @@ class FacetTreeSpec extends Specification { def "Sort one parent and one child"() { given: - disambiguate.isSubclassOf("child", "parent") >> { + jsonLd.isSubClassOf("child", "parent") >> { true } - disambiguate.isSubclassOf("parent", "child") >> { + jsonLd.isSubClassOf("parent", "child") >> { false } expect: - def tree = new FacetTree(disambiguate) + def tree = new FacetTree(jsonLd) tree.sortObservationsAsTree(observations) == sorted where: @@ -41,21 +42,21 @@ class FacetTreeSpec extends Specification { def "Sort one parent with two children"() { given: - disambiguate.isSubclassOf("child1", "parent") >> { + jsonLd.isSubClassOf("child1", "parent") >> { true } - disambiguate.isSubclassOf("child2", "parent") >> { + jsonLd.isSubClassOf("child2", "parent") >> { true } - disambiguate.isSubclassOf("parent", "child1") >> { + jsonLd.isSubClassOf("parent", "child1") >> { false } - disambiguate.isSubclassOf("parent", "child2") >> { + jsonLd.isSubClassOf("parent", "child2") >> { false } expect: - def tree = new FacetTree(disambiguate) + def tree = new FacetTree(jsonLd) tree.sortObservationsAsTree(observations) == sorted where: @@ -69,24 +70,24 @@ class FacetTreeSpec extends Specification { def "Sort one parent with one child that has one child"() { given: - disambiguate.isSubclassOf("child1", "parent") >> { + jsonLd.isSubClassOf("child1", "parent") >> { true } - disambiguate.isSubclassOf("child2", "parent") >> { + jsonLd.isSubClassOf("child2", "parent") >> { false } - disambiguate.isSubclassOf("child2", "child1") >> { + jsonLd.isSubClassOf("child2", "child1") >> { true } - disambiguate.isSubclassOf("parent", "child1") >> { + jsonLd.isSubClassOf("parent", "child1") >> { false } - disambiguate.isSubclassOf("parent", "child2") >> { + jsonLd.isSubClassOf("parent", "child2") >> { false } expect: - def tree = new FacetTree(disambiguate) + def tree = new FacetTree(jsonLd) tree.sortObservationsAsTree(observations) == sorted where: @@ -100,27 +101,27 @@ class FacetTreeSpec extends Specification { def "One parent, two children"() { given: - disambiguate.isSubclassOf("child1", "root") >> { + jsonLd.isSubClassOf("child1", "root") >> { true } - disambiguate.isSubclassOf("child2", "root") >> { + jsonLd.isSubClassOf("child2", "root") >> { true } - disambiguate.isSubclassOf("root", "child1") >> { + jsonLd.isSubClassOf("root", "child1") >> { false } - disambiguate.isSubclassOf("root", "child2") >> { + jsonLd.isSubClassOf("root", "child2") >> { false } - disambiguate.isSubclassOf("child1", "child2") >> { + jsonLd.isSubClassOf("child1", "child2") >> { false } - disambiguate.isSubclassOf("child2", "child1") >> { + jsonLd.isSubClassOf("child2", "child1") >> { false } expect: - def tree = new FacetTree(disambiguate) + def tree = new FacetTree(jsonLd) tree.sortObservationsAsTree(observations) == sorted where: @@ -133,10 +134,10 @@ class FacetTreeSpec extends Specification { def "Three root nodes"() { given: - disambiguate.isSubclassOf(_, _) >> false + jsonLd.isSubClassOf(_, _) >> false expect: - def tree = new FacetTree(disambiguate) + def tree = new FacetTree(jsonLd) tree.sortObservationsAsTree(observations) == sorted where: @@ -150,10 +151,10 @@ class FacetTreeSpec extends Specification { def "Children should not be considered parents of themselves"() { given: - disambiguate.isSubclassOf(_, _) >> true + jsonLd.isSubClassOf(_, _) >> true expect: - def tree = new FacetTree(disambiguate) + def tree = new FacetTree(jsonLd) tree.sortObservationsAsTree(observations) == sorted where: From 6c096dca49c16768be679d0a046c528f85f1a1bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Fri, 17 Jan 2025 17:40:15 +0100 Subject: [PATCH 07/19] Iri -> termkey for comparison to work --- whelk-core/src/main/groovy/whelk/search2/FacetTree.java | 4 ++-- whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 5ddd451932..6ed94b9304 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -56,8 +56,8 @@ private List> findChildren(Map observation, } private boolean isSubClass(Map obsA, Map obsB) { - String idA = get(obsA, List.of("object", "@id"), ""); - String idB = get(obsB, List.of("object", "@id"), ""); + String idA = jsonLd.toTermKey(get(obsA, List.of("object", "@id"), "")); + String idB = jsonLd.toTermKey(get(obsB, List.of("object", "@id"), "")); return !idA.equals(idB) && jsonLd.isSubClassOf(idA, idB); } diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index ab664d3572..59a35adc42 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -9,6 +9,7 @@ class FacetTreeSpec extends Specification { void setup() { jsonLd = GroovyMock(JsonLd.class) + jsonLd.toTermKey(_ as String) >> { String s -> s } } def "Single observation should return list with one observation"() { From fc5bb0f7501ba63acfc15cd79aeec526f324f43a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Mon, 20 Jan 2025 15:29:43 +0100 Subject: [PATCH 08/19] Only care about classes with a direct subclass relationship --- .../src/main/groovy/whelk/JsonLd.groovy | 4 ++ .../main/groovy/whelk/search2/FacetTree.java | 7 +- .../groovy/whelk/search2/FacetTreeSpec.groovy | 66 ++++--------------- 3 files changed, 23 insertions(+), 54 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/JsonLd.groovy b/whelk-core/src/main/groovy/whelk/JsonLd.groovy index b254357dc5..5860df6c31 100644 --- a/whelk-core/src/main/groovy/whelk/JsonLd.groovy +++ b/whelk-core/src/main/groovy/whelk/JsonLd.groovy @@ -730,6 +730,10 @@ class JsonLd { return type in bases } + List getDirectSubclasses(String type) { + return superClassOf.get(type) + } + boolean isInstanceOf(Map entity, String baseType) { if (entity.is(null)) { return false diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 6ed94b9304..614b9454b1 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -58,7 +58,12 @@ private List> findChildren(Map observation, private boolean isSubClass(Map obsA, Map obsB) { String idA = jsonLd.toTermKey(get(obsA, List.of("object", "@id"), "")); String idB = jsonLd.toTermKey(get(obsB, List.of("object", "@id"), "")); - return !idA.equals(idB) && jsonLd.isSubClassOf(idA, idB); + return !idA.equals(idB) && isDirectSubClass(idA, idB); + } + + private boolean isDirectSubClass(String type, String base) { + List directSubclasses = jsonLd.getDirectSubclasses(base); + return directSubclasses.contains(type); } @SuppressWarnings("unchecked") diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index 59a35adc42..0cf0ee344f 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -24,12 +24,8 @@ class FacetTreeSpec extends Specification { def "Sort one parent and one child"() { given: - jsonLd.isSubClassOf("child", "parent") >> { - true - } - jsonLd.isSubClassOf("parent", "child") >> { - false - } + jsonLd.getDirectSubclasses("parent") >> ["child"] + jsonLd.getDirectSubclasses("child") >> [] expect: def tree = new FacetTree(jsonLd) @@ -43,18 +39,9 @@ class FacetTreeSpec extends Specification { def "Sort one parent with two children"() { given: - jsonLd.isSubClassOf("child1", "parent") >> { - true - } - jsonLd.isSubClassOf("child2", "parent") >> { - true - } - jsonLd.isSubClassOf("parent", "child1") >> { - false - } - jsonLd.isSubClassOf("parent", "child2") >> { - false - } + jsonLd.getDirectSubclasses("parent") >> ["child1", "child2"] + jsonLd.getDirectSubclasses("child1") >> [] + jsonLd.getDirectSubclasses("child2") >> [] expect: def tree = new FacetTree(jsonLd) @@ -71,21 +58,9 @@ class FacetTreeSpec extends Specification { def "Sort one parent with one child that has one child"() { given: - jsonLd.isSubClassOf("child1", "parent") >> { - true - } - jsonLd.isSubClassOf("child2", "parent") >> { - false - } - jsonLd.isSubClassOf("child2", "child1") >> { - true - } - jsonLd.isSubClassOf("parent", "child1") >> { - false - } - jsonLd.isSubClassOf("parent", "child2") >> { - false - } + jsonLd.getDirectSubclasses("parent") >> ["child1"] + jsonLd.getDirectSubclasses("child1") >> ["child2"] + jsonLd.getDirectSubclasses("child2") >> [] expect: def tree = new FacetTree(jsonLd) @@ -102,24 +77,9 @@ class FacetTreeSpec extends Specification { def "One parent, two children"() { given: - jsonLd.isSubClassOf("child1", "root") >> { - true - } - jsonLd.isSubClassOf("child2", "root") >> { - true - } - jsonLd.isSubClassOf("root", "child1") >> { - false - } - jsonLd.isSubClassOf("root", "child2") >> { - false - } - jsonLd.isSubClassOf("child1", "child2") >> { - false - } - jsonLd.isSubClassOf("child2", "child1") >> { - false - } + jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] + jsonLd.getDirectSubclasses("child1") >> [] + jsonLd.getDirectSubclasses("child2") >> [] expect: def tree = new FacetTree(jsonLd) @@ -135,7 +95,7 @@ class FacetTreeSpec extends Specification { def "Three root nodes"() { given: - jsonLd.isSubClassOf(_, _) >> false + jsonLd.getDirectSubclasses(_) >> [] expect: def tree = new FacetTree(jsonLd) @@ -152,7 +112,7 @@ class FacetTreeSpec extends Specification { def "Children should not be considered parents of themselves"() { given: - jsonLd.isSubClassOf(_, _) >> true + jsonLd.getDirectSubclasses(_) >> [] expect: def tree = new FacetTree(jsonLd) From 89dc8f8d84394454edfb3db9f27d941266dc81f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Mon, 20 Jan 2025 15:53:27 +0100 Subject: [PATCH 09/19] Don't return null --- whelk-core/src/main/groovy/whelk/JsonLd.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whelk-core/src/main/groovy/whelk/JsonLd.groovy b/whelk-core/src/main/groovy/whelk/JsonLd.groovy index 5860df6c31..be7c48f2a6 100644 --- a/whelk-core/src/main/groovy/whelk/JsonLd.groovy +++ b/whelk-core/src/main/groovy/whelk/JsonLd.groovy @@ -731,7 +731,7 @@ class JsonLd { } List getDirectSubclasses(String type) { - return superClassOf.get(type) + return superClassOf.get(type) ?: [] } boolean isInstanceOf(Map entity, String baseType) { From 403cdaa5d93c91daaa44ba75bf6f23d7760cedda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Thu, 23 Jan 2025 10:35:11 +0100 Subject: [PATCH 10/19] Create intermediate, empty observations for missing classes in the tree --- .../src/main/groovy/whelk/JsonLd.groovy | 13 +- .../main/groovy/whelk/search2/FacetTree.java | 66 ++-- .../groovy/whelk/search2/FacetTreeSpec.groovy | 305 ++++++++++++------ 3 files changed, 266 insertions(+), 118 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/JsonLd.groovy b/whelk-core/src/main/groovy/whelk/JsonLd.groovy index be7c48f2a6..f4623f3b60 100644 --- a/whelk-core/src/main/groovy/whelk/JsonLd.groovy +++ b/whelk-core/src/main/groovy/whelk/JsonLd.groovy @@ -670,12 +670,6 @@ class JsonLd { //==== Class-hierarchies ==== - List getSuperClasses(String type) { - List res = [] - getSuperClasses(type, res) - return res - } - void getSuperClasses(String type, List result) { def termMap = vocabIndex[type] if (termMap == null) @@ -695,6 +689,13 @@ class JsonLd { } } + // Returns all superclasses in an ordered List of typeKeys + List getSuperClasses(String type) { + List allSuperClasses = new ArrayList<>() + getSuperClasses(type, allSuperClasses) + return allSuperClasses + } + private Map> generateSubTermLists(String relationToSuper) { Map> superTermOf = [:] for (String type : vocabIndex.keySet()) { diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 614b9454b1..451cbf23d8 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -2,10 +2,7 @@ import whelk.JsonLd; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Queue; +import java.util.*; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.stream.Collectors; @@ -14,6 +11,7 @@ public class FacetTree { private final JsonLd jsonLd; + private List observationsAsTypeKeys = Collections.emptyList(); public FacetTree(JsonLd jsonLd) { this.jsonLd = jsonLd; @@ -22,19 +20,28 @@ public FacetTree(JsonLd jsonLd) { public List> sortObservationsAsTree(List> observations) { List> tree = new ArrayList<>(); Queue> queue = new ConcurrentLinkedQueue<>(); + List intermediateClasses = new ArrayList<>(); - observations.forEach(observation -> { - var parent = findParent(observation, observations); + observationsAsTypeKeys = observations.stream() + .map(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), ""))) + .toList(); - if (parent == null) { + observations.forEach(observation -> { + if (isRootNode(observation)) { tree.add(observation); queue.add(observation); + } else { + intermediateClasses.addAll(getAbsentSuperClasses(observation)); } }); + // Create empty observations for intermediate classes with 0 totalHits + observations.addAll(intermediateClasses.stream().map(this::createFakeObservation).toList()); + while (!queue.isEmpty()) { var observation = queue.remove(); var children = findChildren(observation, observations); + if (!children.isEmpty()) { queue.addAll(children); observation.put("_children", children); @@ -43,27 +50,48 @@ public List> sortObservationsAsTree(List return List.copyOf(tree); } - private Map findParent(Map observation, List> observations) { - return observations.stream() - .filter(o -> isSubClass(observation, o)) - .findFirst().orElse(null); + private Map createFakeObservation(String termKey) { + Map fakeObservation = new LinkedHashMap<>(); + String termId = jsonLd.toTermId(termKey); + var fakeObject = Map.of(JsonLd.ID_KEY, termId); + fakeObservation.put("totalItems", 0); + fakeObservation.put("view", Map.of(JsonLd.ID_KEY, "fake")); + fakeObservation.put("object", fakeObject); + return fakeObservation; + } + + private List getAbsentSuperClasses(Map observation) { + String typeKey = jsonLd.toTermKey(get(observation, List.of("object", "@id"), "")); + List allSuperClasses = jsonLd.getSuperClasses(typeKey); + + return allSuperClasses.stream() + .takeWhile(s -> !observationsAsTypeKeys.contains(s)) + .toList(); + } + + private boolean hasParentInObservations(Map observation) { + String typeKey = jsonLd.toTermKey(get(observation, List.of("object", "@id"), "")); + List allSuperClasses = jsonLd.getSuperClasses(typeKey); + + return allSuperClasses.stream() + .anyMatch(s -> observationsAsTypeKeys.contains(s)); + } + + private boolean isRootNode(Map observation) { + return !hasParentInObservations(observation); } private List> findChildren(Map observation, List> observations) { return observations.stream() - .filter(o -> isSubClass(o, observation)) + .filter(o -> isDirectSubclass(o, observation)) .collect(Collectors.toList()); } - private boolean isSubClass(Map obsA, Map obsB) { + private boolean isDirectSubclass(Map obsA, Map obsB) { String idA = jsonLd.toTermKey(get(obsA, List.of("object", "@id"), "")); String idB = jsonLd.toTermKey(get(obsB, List.of("object", "@id"), "")); - return !idA.equals(idB) && isDirectSubClass(idA, idB); - } - - private boolean isDirectSubClass(String type, String base) { - List directSubclasses = jsonLd.getDirectSubclasses(base); - return directSubclasses.contains(type); + List directSubclasses = jsonLd.getDirectSubclasses(idB); + return directSubclasses.contains(idA); } @SuppressWarnings("unchecked") diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index 0cf0ee344f..2e1bb97b73 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -10,77 +10,82 @@ class FacetTreeSpec extends Specification { void setup() { jsonLd = GroovyMock(JsonLd.class) jsonLd.toTermKey(_ as String) >> { String s -> s } + jsonLd.toTermId(_ as String) >> { String s -> s } } - def "Single observation should return list with one observation"() { - expect: - def tree = new FacetTree(jsonLd) - tree.sortObservationsAsTree(observations) == sorted - - where: - observations | sorted - [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] - } - - def "Sort one parent and one child"() { - given: - jsonLd.getDirectSubclasses("parent") >> ["child"] - jsonLd.getDirectSubclasses("child") >> [] - - expect: - def tree = new FacetTree(jsonLd) - tree.sortObservationsAsTree(observations) == sorted - - where: - observations | sorted - [["object": ["@id": "parent"]], - ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child"]]]]] - } - - def "Sort one parent with two children"() { - given: - jsonLd.getDirectSubclasses("parent") >> ["child1", "child2"] - jsonLd.getDirectSubclasses("child1") >> [] - jsonLd.getDirectSubclasses("child2") >> [] - - expect: - def tree = new FacetTree(jsonLd) - tree.sortObservationsAsTree(observations) == sorted - - where: - observations | sorted - [["object": ["@id": "parent"]], - ["object": ["@id": "child1"]], - ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], - "_children": [["object": ["@id": "child1"]], - ["object": ["@id": "child2"]]]]] - } - - def "Sort one parent with one child that has one child"() { - given: - jsonLd.getDirectSubclasses("parent") >> ["child1"] - jsonLd.getDirectSubclasses("child1") >> ["child2"] - jsonLd.getDirectSubclasses("child2") >> [] - - expect: - def tree = new FacetTree(jsonLd) - tree.sortObservationsAsTree(observations) == sorted - - where: - observations | sorted - [["object": ["@id": "parent"]], - ["object": ["@id": "child1"]], - ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], - "_children": [["object": ["@id": "child1"], - "_children": [["object": ["@id": "child2"]]]]]]] - } - +// def "Single observation should return list with one observation"() { +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] +// } +// +// def "Sort one parent and one child"() { +// given: +// jsonLd.getDirectSubclasses("parent") >> ["child"] +// jsonLd.getDirectSubclasses("child") >> [] +// +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]], +// ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child"]]]]] +// } +// +// def "Sort one parent with two children"() { +// given: +// jsonLd.getDirectSubclasses("parent") >> ["child1", "child2"] +// jsonLd.getDirectSubclasses("child1") >> [] +// jsonLd.getDirectSubclasses("child2") >> [] +// +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]], +// ["object": ["@id": "child1"]], +// ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], +// "_children": [["object": ["@id": "child1"]], +// ["object": ["@id": "child2"]]]]] +// } +// +// def "Sort one parent with one child that has one child"() { +// given: +// jsonLd.getDirectSubclasses("parent") >> ["child1"] +// jsonLd.getDirectSubclasses("child1") >> ["child2"] +// jsonLd.getDirectSubclasses("child2") >> [] +// +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]], +// ["object": ["@id": "child1"]], +// ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], +// "_children": [["object": ["@id": "child1"], +// "_children": [["object": ["@id": "child2"]]]]]]] +// } +// def "One parent, two children"() { given: jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] jsonLd.getDirectSubclasses("child1") >> [] jsonLd.getDirectSubclasses("child2") >> [] + jsonLd.getSuperClasses("child1") >> ["root"] + jsonLd.getSuperClasses("child2") >> ["root"] + jsonLd.getSuperClasses("root") >> [] + expect: def tree = new FacetTree(jsonLd) tree.sortObservationsAsTree(observations) == sorted @@ -92,34 +97,148 @@ class FacetTreeSpec extends Specification { ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "_children" : [["object": ["@id": "child1"]], ["object": ["@id": "child2"]]]]] } - - def "Three root nodes"() { - given: - jsonLd.getDirectSubclasses(_) >> [] - - expect: - def tree = new FacetTree(jsonLd) - tree.sortObservationsAsTree(observations) == sorted - - where: - observations | sorted - [["object": ["@id": "root1"]], - ["object": ["@id": "root2"]], - ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], - ["object": ["@id": "root2"]], - ["object": ["@id": "root3"]]] - } - - def "Children should not be considered parents of themselves"() { - given: - jsonLd.getDirectSubclasses(_) >> [] - - expect: - def tree = new FacetTree(jsonLd) - tree.sortObservationsAsTree(observations) == sorted - - where: - observations | sorted - [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] - } +// +// def "Three root nodes"() { +// given: +// jsonLd.getDirectSubclasses(_) >> [] +// +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "root1"]], +// ["object": ["@id": "root2"]], +// ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], +// ["object": ["@id": "root2"]], +// ["object": ["@id": "root3"]]] +// } +// +// def "Children should not be considered parents of themselves"() { +// given: +// jsonLd.getDirectSubclasses(_) >> [] +// +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] +// } + + /// TESTS CHECKING FOR SUPERCLASS IN OBJECT.SUBCLASSOF + +// def "Single observation should return list with one observation"() { +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] +// } +// +// def "Sort one parent and one child"() { +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]], +// ["object": ["@id": "child", "subClassOf" : [["@id": "parent"]]]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child", "subClassOf" : [["@id": "parent"]]]]]]] +// } +// +// def "Sort one parent with two children"() { +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]], +// ["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]]], +// ["object": ["@id": "child2", "subClassOf" : [["@id": "parent"]]]]] | [["object": ["@id": "parent"], +// "_children": [["object" : ["@id": "child1", "subClassOf" : [["@id": "parent"]]]], +// ["object" : ["@id": "child2", "subClassOf" : [["@id": "parent"]]]] +// ]]] +// } +// +// def "Sort one parent with one child that has one child"() { +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]], +// ["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]]], +// ["object": ["@id": "child2", "subClassOf" : [["@id": "child1"]]]]] | [["object": ["@id": "parent"], +// "_children": [["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]], +// "_children": [["object": ["@id": "child2", "subClassOf" : [["@id": "child1"]]]]]]]]] +// } +// +// def "One parent, one between, one child"() { +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "parent"]], +// ["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]]]] | [["object": ["@id": "parent"], +// "_children": [["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]] +// ]]]] +// } + + +// def "One parent, two children"() { +// given: +// jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] +// jsonLd.getDirectSubclasses("child1") >> [] +// jsonLd.getDirectSubclasses("child2") >> [] +// +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "root"]], +// ["object": ["@id": "child1"]], +// ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "_children" : [["object": ["@id": "child1"]], +// ["object": ["@id": "child2"]]]]] +// } +// +// def "Three root nodes"() { +// given: +// jsonLd.getDirectSubclasses(_) >> [] +// +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "root1"]], +// ["object": ["@id": "root2"]], +// ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], +// ["object": ["@id": "root2"]], +// ["object": ["@id": "root3"]]] +// } +// +// def "Children should not be considered parents of themselves"() { +// given: +// jsonLd.getDirectSubclasses(_) >> [] +// +// expect: +// def tree = new FacetTree(jsonLd) +// tree.sortObservationsAsTree(observations) == sorted +// +// where: +// observations | sorted +// [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] +// } } From c85c8101ecaeadf0949684213190db07294358e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Thu, 23 Jan 2025 10:53:47 +0100 Subject: [PATCH 11/19] Fix tests --- .../groovy/whelk/search2/FacetTreeSpec.groovy | 320 +++++++----------- 1 file changed, 113 insertions(+), 207 deletions(-) diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index 2e1bb97b73..783b434356 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -13,69 +13,86 @@ class FacetTreeSpec extends Specification { jsonLd.toTermId(_ as String) >> { String s -> s } } -// def "Single observation should return list with one observation"() { -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] -// } -// -// def "Sort one parent and one child"() { -// given: -// jsonLd.getDirectSubclasses("parent") >> ["child"] -// jsonLd.getDirectSubclasses("child") >> [] -// -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]], -// ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child"]]]]] -// } -// -// def "Sort one parent with two children"() { -// given: -// jsonLd.getDirectSubclasses("parent") >> ["child1", "child2"] -// jsonLd.getDirectSubclasses("child1") >> [] -// jsonLd.getDirectSubclasses("child2") >> [] -// -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]], -// ["object": ["@id": "child1"]], -// ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], -// "_children": [["object": ["@id": "child1"]], -// ["object": ["@id": "child2"]]]]] -// } -// -// def "Sort one parent with one child that has one child"() { -// given: -// jsonLd.getDirectSubclasses("parent") >> ["child1"] -// jsonLd.getDirectSubclasses("child1") >> ["child2"] -// jsonLd.getDirectSubclasses("child2") >> [] -// -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]], -// ["object": ["@id": "child1"]], -// ["object": ["@id": "child2"]]] | [["object": ["@id": "parent"], -// "_children": [["object": ["@id": "child1"], -// "_children": [["object": ["@id": "child2"]]]]]]] -// } -// + def "Single observation should return list with one observation"() { + given: + jsonLd.getDirectSubclasses("parent") >> [] + jsonLd.getSuperClasses("parent") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] + } + + def "Sort one parent and one child"() { + given: + jsonLd.getDirectSubclasses("parent") >> ["child"] + jsonLd.getDirectSubclasses("child") >> [] + + jsonLd.getSuperClasses("child") >> ["parent"] + jsonLd.getSuperClasses("parent") >> [] + + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]], + ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child"]]]]] + } + + def "Sort one parent with two children, superclasses of root should be ignored"() { + given: + jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] + jsonLd.getDirectSubclasses("child1") >> [] + jsonLd.getDirectSubclasses("child2") >> [] + + jsonLd.getSuperClasses("child1") >> ["root", "Resource"] + jsonLd.getSuperClasses("child2") >> ["child1", "root", "Resource"] + jsonLd.getSuperClasses("root") >> ["Resource"] + + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], + "_children": [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } + + def "Sort one parent with one child that has one child"() { + given: + jsonLd.getDirectSubclasses("root") >> ["child1"] + jsonLd.getDirectSubclasses("child1") >> ["child2"] + jsonLd.getDirectSubclasses("child2") >> [] + + jsonLd.getSuperClasses("child1") >> ["root"] + jsonLd.getSuperClasses("child2") >> ["child1", "root"] + jsonLd.getSuperClasses("root") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], + "_children": [["object": ["@id": "child1"], + "_children": [["object": ["@id": "child2"]]]]]]] + } + def "One parent, two children"() { given: jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] @@ -97,148 +114,37 @@ class FacetTreeSpec extends Specification { ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "_children" : [["object": ["@id": "child1"]], ["object": ["@id": "child2"]]]]] } -// -// def "Three root nodes"() { -// given: -// jsonLd.getDirectSubclasses(_) >> [] -// -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "root1"]], -// ["object": ["@id": "root2"]], -// ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], -// ["object": ["@id": "root2"]], -// ["object": ["@id": "root3"]]] -// } -// -// def "Children should not be considered parents of themselves"() { -// given: -// jsonLd.getDirectSubclasses(_) >> [] -// -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] -// } - - /// TESTS CHECKING FOR SUPERCLASS IN OBJECT.SUBCLASSOF - -// def "Single observation should return list with one observation"() { -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] -// } -// -// def "Sort one parent and one child"() { -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]], -// ["object": ["@id": "child", "subClassOf" : [["@id": "parent"]]]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child", "subClassOf" : [["@id": "parent"]]]]]]] -// } -// -// def "Sort one parent with two children"() { -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]], -// ["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]]], -// ["object": ["@id": "child2", "subClassOf" : [["@id": "parent"]]]]] | [["object": ["@id": "parent"], -// "_children": [["object" : ["@id": "child1", "subClassOf" : [["@id": "parent"]]]], -// ["object" : ["@id": "child2", "subClassOf" : [["@id": "parent"]]]] -// ]]] -// } -// -// def "Sort one parent with one child that has one child"() { -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]], -// ["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]]], -// ["object": ["@id": "child2", "subClassOf" : [["@id": "child1"]]]]] | [["object": ["@id": "parent"], -// "_children": [["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]], -// "_children": [["object": ["@id": "child2", "subClassOf" : [["@id": "child1"]]]]]]]]] -// } -// -// def "One parent, one between, one child"() { -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "parent"]], -// ["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]]]] | [["object": ["@id": "parent"], -// "_children": [["object": ["@id": "child1", "subClassOf" : [["@id": "parent"]]] -// ]]]] -// } - - -// def "One parent, two children"() { -// given: -// jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] -// jsonLd.getDirectSubclasses("child1") >> [] -// jsonLd.getDirectSubclasses("child2") >> [] -// -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "root"]], -// ["object": ["@id": "child1"]], -// ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "_children" : [["object": ["@id": "child1"]], -// ["object": ["@id": "child2"]]]]] -// } -// -// def "Three root nodes"() { -// given: -// jsonLd.getDirectSubclasses(_) >> [] -// -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "root1"]], -// ["object": ["@id": "root2"]], -// ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], -// ["object": ["@id": "root2"]], -// ["object": ["@id": "root3"]]] -// } -// -// def "Children should not be considered parents of themselves"() { -// given: -// jsonLd.getDirectSubclasses(_) >> [] -// -// expect: -// def tree = new FacetTree(jsonLd) -// tree.sortObservationsAsTree(observations) == sorted -// -// where: -// observations | sorted -// [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] -// } + + def "Three root nodes"() { + given: + jsonLd.getDirectSubclasses(_) >> [] + jsonLd.getSuperClasses(_) >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root1"]], + ["object": ["@id": "root2"]], + ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], + ["object": ["@id": "root2"]], + ["object": ["@id": "root3"]]] + } + + def "Children should not be considered parents of themselves"() { + given: + jsonLd.getDirectSubclasses(_) >> [] + jsonLd.getSuperClasses(_) >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] + } + } From 964a7149a37846cc7cef01e29e4cd53ae4c0c3f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Thu, 23 Jan 2025 11:13:27 +0100 Subject: [PATCH 12/19] Add test for intermediate observations --- .../groovy/whelk/search2/FacetTreeSpec.groovy | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index 783b434356..cf34b966ee 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -147,4 +147,26 @@ class FacetTreeSpec extends Specification { [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] } + def "Root with one intermediate observation before one child"() { + given: + jsonLd.getDirectSubclasses("root") >> ["intermediate"] + jsonLd.getDirectSubclasses("intermediate") >> ["child"] + jsonLd.getDirectSubclasses("child") >> [] + + jsonLd.getSuperClasses("child") >> ["intermediate", "root"] + jsonLd.getSuperClasses("root") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + // TODO: don't depend on exact form of fake observation + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child"]]] | [["object": ["@id": "root"], + "_children": [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "intermediate"], + "_children": [["object": ["@id": "child"]]]]]]] + } + } From e9eba4de07d8c6d723dfccc605cf50f9b913fa67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Thu, 23 Jan 2025 14:18:15 +0100 Subject: [PATCH 13/19] Don't allow duplicates --- whelk-core/src/main/groovy/whelk/search2/FacetTree.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 451cbf23d8..80714222b7 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -20,7 +20,7 @@ public FacetTree(JsonLd jsonLd) { public List> sortObservationsAsTree(List> observations) { List> tree = new ArrayList<>(); Queue> queue = new ConcurrentLinkedQueue<>(); - List intermediateClasses = new ArrayList<>(); + Set intermediateClasses = new HashSet<>(); observationsAsTypeKeys = observations.stream() .map(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), ""))) From 23a410892bd4b4e9a284c9cd31f91be2591bc8f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Mon, 27 Jan 2025 14:33:51 +0100 Subject: [PATCH 14/19] Also include the common root, even if it is absent. This avoids mixing multiple roots from different levels in the tree. --- .../main/groovy/whelk/search2/FacetTree.java | 62 +++++++++++++----- .../groovy/whelk/search2/FacetTreeSpec.groovy | 65 ++++++++++++++----- 2 files changed, 95 insertions(+), 32 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 80714222b7..20b6fe87ab 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -11,7 +11,7 @@ public class FacetTree { private final JsonLd jsonLd; - private List observationsAsTypeKeys = Collections.emptyList(); + private List observationsAsTypeKeys = new ArrayList<>(); public FacetTree(JsonLd jsonLd) { this.jsonLd = jsonLd; @@ -24,18 +24,36 @@ public List> sortObservationsAsTree(List observationsAsTypeKeys = observations.stream() .map(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), ""))) - .toList(); + .collect(Collectors.toList()); - observations.forEach(observation -> { - if (isRootNode(observation)) { - tree.add(observation); - queue.add(observation); - } else { - intermediateClasses.addAll(getAbsentSuperClasses(observation)); + List roots = observationsAsTypeKeys.stream().filter(this::isRootNode).toList(); + + String rootKey; + + if (roots.size() == 1) { + Map root = observations.stream() + .filter(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), "")).equals(roots.getFirst())) + .findFirst() + .orElse(null); + tree.add(root); + queue.add(root); + rootKey = jsonLd.toTermKey(get(root, List.of("object", "@id"), "")); + } else { + rootKey = getAbsentRoot(observationsAsTypeKeys.getFirst()); + observationsAsTypeKeys.add(rootKey); + Map root = createFakeObservation(rootKey); + observations.add(root); + tree.add(root); + queue.add(root); + + } + + observationsAsTypeKeys.forEach(typeKey -> { + if (!typeKey.equals(rootKey)) { + intermediateClasses.addAll(getIntermediateClassesFor(typeKey)); } }); - // Create empty observations for intermediate classes with 0 totalHits observations.addAll(intermediateClasses.stream().map(this::createFakeObservation).toList()); while (!queue.isEmpty()) { @@ -60,8 +78,11 @@ private Map createFakeObservation(String termKey) { return fakeObservation; } - private List getAbsentSuperClasses(Map observation) { - String typeKey = jsonLd.toTermKey(get(observation, List.of("object", "@id"), "")); + private List getIntermediateClassesFor(String typeKey) { + return getAbsentSuperClasses(typeKey); + } + + private List getAbsentSuperClasses(String typeKey) { List allSuperClasses = jsonLd.getSuperClasses(typeKey); return allSuperClasses.stream() @@ -69,16 +90,27 @@ private List getAbsentSuperClasses(Map observation) { .toList(); } - private boolean hasParentInObservations(Map observation) { - String typeKey = jsonLd.toTermKey(get(observation, List.of("object", "@id"), "")); + private String getAbsentRoot(String typeKey) { + List allSuperClasses = jsonLd.getSuperClasses(typeKey); + return allSuperClasses.stream() + .filter(this::subClassesContainsAllObservations) + .findFirst().orElse(null); + } + + private boolean subClassesContainsAllObservations(String c) { + Set subClasses = jsonLd.getSubClasses(c); + return subClasses.containsAll(observationsAsTypeKeys); + } + + private boolean hasParentInObservations(String typeKey) { List allSuperClasses = jsonLd.getSuperClasses(typeKey); return allSuperClasses.stream() .anyMatch(s -> observationsAsTypeKeys.contains(s)); } - private boolean isRootNode(Map observation) { - return !hasParentInObservations(observation); + private boolean isRootNode(String typeKey) { + return !hasParentInObservations(typeKey); } private List> findChildren(Map observation, List> observations) { diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy index cf34b966ee..7579cdd61b 100644 --- a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -49,12 +49,19 @@ class FacetTreeSpec extends Specification { def "Sort one parent with two children, superclasses of root should be ignored"() { given: jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] + jsonLd.getDirectSubclasses("Resource") >> ["root"] jsonLd.getDirectSubclasses("child1") >> [] jsonLd.getDirectSubclasses("child2") >> [] jsonLd.getSuperClasses("child1") >> ["root", "Resource"] jsonLd.getSuperClasses("child2") >> ["child1", "root", "Resource"] jsonLd.getSuperClasses("root") >> ["Resource"] + jsonLd.getSuperClasses("Resource") >> [] + + jsonLd.getSubClasses("Resource") >> ["root", "child1", "child2"] + jsonLd.getSubClasses("root") >> ["child1", "child2"] + jsonLd.getSubClasses("child1") >> [] + jsonLd.getSubClasses("child2") >> [] expect: @@ -117,8 +124,20 @@ class FacetTreeSpec extends Specification { def "Three root nodes"() { given: - jsonLd.getDirectSubclasses(_) >> [] - jsonLd.getSuperClasses(_) >> [] + jsonLd.getDirectSubclasses("absentRoot") >> ["root1", "root2", "root3"] + jsonLd.getDirectSubclasses("root1") >> [] + jsonLd.getDirectSubclasses("root2") >> [] + jsonLd.getDirectSubclasses("root3") >> [] + + jsonLd.getSuperClasses("root1") >> ["absentRoot"] + jsonLd.getSuperClasses("root2") >> ["absentRoot"] + jsonLd.getSuperClasses("root3") >> ["absentRoot"] + jsonLd.getSuperClasses("absentRoot") >> [] + + jsonLd.getSubClasses("absentRoot") >> ["root1", "root2", "root3"] + jsonLd.getSubClasses("root1") >> [] + jsonLd.getSubClasses("root2") >> [] + jsonLd.getSubClasses("root3") >> [] expect: def tree = new FacetTree(jsonLd) @@ -128,24 +147,11 @@ class FacetTreeSpec extends Specification { observations | sorted [["object": ["@id": "root1"]], ["object": ["@id": "root2"]], - ["object": ["@id": "root3"]]] | [["object": ["@id": "root1"]], + ["object": ["@id": "root3"]]] | [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "absentRoot"], "_children": [["object": ["@id": "root1"]], ["object": ["@id": "root2"]], - ["object": ["@id": "root3"]]] + ["object": ["@id": "root3"]]]]] } - def "Children should not be considered parents of themselves"() { - given: - jsonLd.getDirectSubclasses(_) >> [] - jsonLd.getSuperClasses(_) >> [] - - expect: - def tree = new FacetTree(jsonLd) - tree.sortObservationsAsTree(observations) == sorted - - where: - observations | sorted - [["object": ["@id": "A"]], ["object": ["@id": "A"]]] | [["object": ["@id": "A"]], ["object": ["@id": "A"]]] - } def "Root with one intermediate observation before one child"() { given: @@ -169,4 +175,29 @@ class FacetTreeSpec extends Specification { "_children": [["object": ["@id": "child"]]]]]]] } + def "Absent root, two children"() { + given: + jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] + jsonLd.getDirectSubclasses("child1") >> [] + jsonLd.getDirectSubclasses("child2") >> [] + + jsonLd.getSuperClasses("child1") >> ["root"] + jsonLd.getSuperClasses("child2") >> ["root"] + jsonLd.getSuperClasses("root") >> [] + + jsonLd.getSubClasses("root") >> ["child1", "child2"] + jsonLd.getSubClasses("child1") >> [] + jsonLd.getSubClasses("child2") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "root"], + "_children" : [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } } From 5b820396038a787e4a3f12361db43aaf1d1a541b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Tue, 28 Jan 2025 17:21:52 +0100 Subject: [PATCH 15/19] Cleanup --- .../main/groovy/whelk/search2/FacetTree.java | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 20b6fe87ab..ce3a505715 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -26,27 +26,24 @@ public List> sortObservationsAsTree(List .map(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), ""))) .collect(Collectors.toList()); - List roots = observationsAsTypeKeys.stream().filter(this::isRootNode).toList(); - + List rootCandidates = observationsAsTypeKeys.stream().filter(this::isRootNode).toList(); String rootKey; + Map root; - if (roots.size() == 1) { - Map root = observations.stream() - .filter(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), "")).equals(roots.getFirst())) + if (rootCandidates.size() == 1) { + root = observations.stream() + .filter(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), "")).equals(rootCandidates.getFirst())) .findFirst() .orElse(null); - tree.add(root); - queue.add(root); rootKey = jsonLd.toTermKey(get(root, List.of("object", "@id"), "")); } else { rootKey = getAbsentRoot(observationsAsTypeKeys.getFirst()); observationsAsTypeKeys.add(rootKey); - Map root = createFakeObservation(rootKey); + root = createFakeObservation(rootKey); observations.add(root); - tree.add(root); - queue.add(root); - } + tree.add(root); + queue.add(root); observationsAsTypeKeys.forEach(typeKey -> { if (!typeKey.equals(rootKey)) { From 72b3bee592a3b890bd2989533a2325b888c802ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Tue, 28 Jan 2025 17:45:01 +0100 Subject: [PATCH 16/19] Add nullcheck --- .../src/main/groovy/whelk/search2/FacetTree.java | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index ce3a505715..f695e22698 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -36,14 +36,19 @@ public List> sortObservationsAsTree(List .findFirst() .orElse(null); rootKey = jsonLd.toTermKey(get(root, List.of("object", "@id"), "")); + tree.add(root); + queue.add(root); } else { rootKey = getAbsentRoot(observationsAsTypeKeys.getFirst()); - observationsAsTypeKeys.add(rootKey); - root = createFakeObservation(rootKey); - observations.add(root); + //TODO: should not be null + if (rootKey != null) { + observationsAsTypeKeys.add(rootKey); + root = createFakeObservation(rootKey); + observations.add(root); + tree.add(root); + queue.add(root); + } } - tree.add(root); - queue.add(root); observationsAsTypeKeys.forEach(typeKey -> { if (!typeKey.equals(rootKey)) { From 3d735b7246b6afabe4bf0b811c4dd45d32f5c354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Tue, 28 Jan 2025 18:09:58 +0100 Subject: [PATCH 17/19] Fix weird NPE --- whelk-core/src/main/groovy/whelk/search2/FacetTree.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index f695e22698..16b85ed813 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -73,6 +73,11 @@ public List> sortObservationsAsTree(List private Map createFakeObservation(String termKey) { Map fakeObservation = new LinkedHashMap<>(); String termId = jsonLd.toTermId(termKey); + if (termId == null) { + // TODO: investigate!! + // Happens when observations are "" and "Dataset". + return new HashMap<>(); + } var fakeObject = Map.of(JsonLd.ID_KEY, termId); fakeObservation.put("totalItems", 0); fakeObservation.put("view", Map.of(JsonLd.ID_KEY, "fake")); From f55b89b703a9390105e5bc89be1e2a018f5453ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Fri, 31 Jan 2025 11:28:16 +0100 Subject: [PATCH 18/19] Refactoring --- .../main/groovy/whelk/search2/FacetTree.java | 52 +++++++++---------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java index 16b85ed813..5916a01b98 100644 --- a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -4,6 +4,7 @@ import java.util.*; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.function.Function; import java.util.stream.Collectors; import static whelk.util.DocumentUtil.getAtPath; @@ -11,7 +12,7 @@ public class FacetTree { private final JsonLd jsonLd; - private List observationsAsTypeKeys = new ArrayList<>(); + private Map> keyToObservation = new HashMap<>(); public FacetTree(JsonLd jsonLd) { this.jsonLd = jsonLd; @@ -22,39 +23,36 @@ public List> sortObservationsAsTree(List Queue> queue = new ConcurrentLinkedQueue<>(); Set intermediateClasses = new HashSet<>(); - observationsAsTypeKeys = observations.stream() - .map(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), ""))) - .collect(Collectors.toList()); + keyToObservation = observations.stream() + .collect(Collectors.toMap(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), "")), Function.identity())); - List rootCandidates = observationsAsTypeKeys.stream().filter(this::isRootNode).toList(); - String rootKey; - Map root; + List rootCandidates = keyToObservation.keySet().stream().filter(this::isRootNode).toList(); + String rootKey = ""; if (rootCandidates.size() == 1) { - root = observations.stream() - .filter(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), "")).equals(rootCandidates.getFirst())) - .findFirst() - .orElse(null); - rootKey = jsonLd.toTermKey(get(root, List.of("object", "@id"), "")); + rootKey = rootCandidates.getFirst(); + var root = keyToObservation.get(rootKey); tree.add(root); queue.add(root); } else { - rootKey = getAbsentRoot(observationsAsTypeKeys.getFirst()); - //TODO: should not be null - if (rootKey != null) { - observationsAsTypeKeys.add(rootKey); - root = createFakeObservation(rootKey); - observations.add(root); - tree.add(root); - queue.add(root); + Optional first = keyToObservation.keySet().stream().findFirst(); + if (first.isPresent()) { + Optional rootKeyOpt = getAbsentRoot(first.get()); + if (rootKeyOpt.isPresent()) { + rootKey = rootKeyOpt.get(); + var root = createFakeObservation(rootKey); + observations.add(root); + tree.add(root); + queue.add(root); + } } } - observationsAsTypeKeys.forEach(typeKey -> { + for (String typeKey : keyToObservation.keySet()) { if (!typeKey.equals(rootKey)) { intermediateClasses.addAll(getIntermediateClassesFor(typeKey)); } - }); + } observations.addAll(intermediateClasses.stream().map(this::createFakeObservation).toList()); @@ -93,27 +91,27 @@ private List getAbsentSuperClasses(String typeKey) { List allSuperClasses = jsonLd.getSuperClasses(typeKey); return allSuperClasses.stream() - .takeWhile(s -> !observationsAsTypeKeys.contains(s)) + .takeWhile(s -> !keyToObservation.containsKey(s)) .toList(); } - private String getAbsentRoot(String typeKey) { + private Optional getAbsentRoot(String typeKey) { List allSuperClasses = jsonLd.getSuperClasses(typeKey); return allSuperClasses.stream() .filter(this::subClassesContainsAllObservations) - .findFirst().orElse(null); + .findFirst(); } private boolean subClassesContainsAllObservations(String c) { Set subClasses = jsonLd.getSubClasses(c); - return subClasses.containsAll(observationsAsTypeKeys); + return subClasses.containsAll(keyToObservation.keySet()); } private boolean hasParentInObservations(String typeKey) { List allSuperClasses = jsonLd.getSuperClasses(typeKey); return allSuperClasses.stream() - .anyMatch(s -> observationsAsTypeKeys.contains(s)); + .anyMatch(s -> keyToObservation.containsKey(s)); } private boolean isRootNode(String typeKey) { From 85a0a17eb785d8055c7c5293facfdec669382ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lars=20Rosenstr=C3=B6m?= Date: Tue, 3 Jun 2025 11:06:12 +0200 Subject: [PATCH 19/19] Adjustments after rebase --- .../src/main/groovy/whelk/search2/Query.java | 5 +- .../src/main/groovy/whelk/search2/Stats.java | 319 ------------------ 2 files changed, 3 insertions(+), 321 deletions(-) delete mode 100644 whelk-core/src/main/groovy/whelk/search2/Stats.java diff --git a/whelk-core/src/main/groovy/whelk/search2/Query.java b/whelk-core/src/main/groovy/whelk/search2/Query.java index ecbf139ec9..bf960f8233 100644 --- a/whelk-core/src/main/groovy/whelk/search2/Query.java +++ b/whelk-core/src/main/groovy/whelk/search2/Query.java @@ -463,8 +463,9 @@ private Map buildSliceByDimension(Map(); var observations = getObservations(propertyKey, buckets); - if (property.name().equals(Disambiguate.RDF_TYPE)) { - observations = new FacetTree(jsonLd).sortObservationsAsTree(observations); + + if (property.name().equals(JsonLd.Rdfs.RDF_TYPE)) { + observations = new FacetTree(whelk.getJsonld()).sortObservationsAsTree(observations); } if (!observations.isEmpty()) { if (selectedFilters.isRangeFilter(propertyKey)) { diff --git a/whelk-core/src/main/groovy/whelk/search2/Stats.java b/whelk-core/src/main/groovy/whelk/search2/Stats.java deleted file mode 100644 index be9118d4dc..0000000000 --- a/whelk-core/src/main/groovy/whelk/search2/Stats.java +++ /dev/null @@ -1,319 +0,0 @@ -package whelk.search2; - -import whelk.JsonLd; -import whelk.search2.querytree.FreeText; -import whelk.search2.querytree.Link; -import whelk.search2.querytree.Literal; -import whelk.search2.querytree.Node; -import whelk.search2.querytree.PathValue; -import whelk.search2.querytree.Property; -import whelk.search2.querytree.PropertyValue; -import whelk.search2.querytree.QueryTree; -import whelk.search2.querytree.Value; -import whelk.search2.querytree.VocabTerm; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; - -import static whelk.search2.QueryUtil.makeFindUrl; -import static whelk.search2.QueryUtil.makeParams; - -public class Stats { - private final Disambiguate disambiguate; - private final QueryResult queryResult; - private final QueryParams queryParams; - private final AppParams appParams; - private final QueryTree queryTree; - private final QueryUtil queryUtil; - private final JsonLd jsonLd; - - public Stats(Disambiguate disambiguate, - QueryUtil queryUtil, - QueryTree queryTree, - QueryResult queryResult, - QueryParams queryParams, - AppParams appParams, - JsonLd jsonLd - ) { - this.disambiguate = disambiguate; - this.queryResult = queryResult; - this.queryParams = queryParams; - this.appParams = appParams; - this.queryTree = queryTree; - this.queryUtil = queryUtil; - this.jsonLd = jsonLd; - } - - public Map build() { - var sliceByDimension = getSliceByDimension(); - var boolFilters = getBoolFilters(); - // TODO: Use Multi search API instead of doing two elastic roundtrips? - // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-multi-search.html - var predicates = getCuratedPredicateLinks(); - return Map.of(JsonLd.ID_KEY, "#stats", - "sliceByDimension", sliceByDimension, - "_boolFilters", boolFilters, - "_predicates", predicates); - } - - private Map getSliceByDimension() { - var buckets = collectBuckets(); - var rangeProps = appParams.statsRepr.getRangeProperties(); - return buildSliceByDimension(buckets, rangeProps); - } - - // Problem: Same value in different fields will be counted twice, e.g. contribution.agent + instanceOf.contribution.agent - private Map> collectBuckets() { - Map sliceParamsByProperty = appParams.statsRepr.getSliceByProperty(); - Map> propertyNameToBucketCounts = new LinkedHashMap<>(); - - // TODO: Decide how to handle properties that can appear at both instance and work level. - // Probably not the best idea to just add the counts together like we do now, since it's both inconvenient - // and not guaranteed to produce a correct number. - for (var agg : queryResult.aggs) { - for (var b : agg.buckets()) { - var buckets = propertyNameToBucketCounts.computeIfAbsent(agg.property(), x -> new HashMap<>()); - buckets.compute(b.value(), (k, v) -> v == null ? b.count() : v + b.count()); - } - } - - Map> propertyToBucketCounts = new LinkedHashMap<>(); - - for (Property property : sliceParamsByProperty.keySet()) { - var buckets = propertyNameToBucketCounts.get(property.name()); - if (buckets != null) { - int maxBuckets = sliceParamsByProperty.get(property).size(); - Map newBuckets = new LinkedHashMap<>(); - buckets.entrySet() - .stream() - .sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())) - .limit(Math.min(maxBuckets, buckets.size())) - .forEach(entry -> { - String bucketKey = entry.getKey(); - int count = entry.getValue(); - Value v; - if (property.hasVocabValue()) { - v = new VocabTerm(bucketKey, disambiguate.getDefinition(bucketKey)); - } else if (property.isObjectProperty()) { - v = new Link(bucketKey, disambiguate.getChip(bucketKey)); - } else { - v = new Literal(bucketKey); - } - newBuckets.put(new PropertyValue(property, Operator.EQUALS, v), count); - }); - propertyToBucketCounts.put(property, newBuckets); - } - } - - return propertyToBucketCounts; - } - - private Map buildSliceByDimension(Map> propToBuckets, Set rangeProps) { - Map nonQueryParams = queryParams.getNonQueryParams(0); - Map sliceParamsByProperty = appParams.statsRepr.getSliceByProperty(); - - Map sliceByDimension = new LinkedHashMap<>(); - - propToBuckets.forEach((property, buckets) -> { - var sliceNode = new LinkedHashMap<>(); - var isRange = rangeProps.contains(property); - var observations = getObservations(buckets, isRange ? queryTree.removeTopLevelPropValueWithRangeIfPropEquals(property) : queryTree, nonQueryParams); - - if (property.isType()) { - observations = new FacetTree(jsonLd).sortObservationsAsTree(observations); - } - if (!observations.isEmpty()) { - if (isRange) { - sliceNode.put("search", getRangeTemplate(property, makeParams(nonQueryParams))); - } - sliceNode.put("dimension", property.name()); - sliceNode.put("observation", observations); - sliceNode.put("maxItems", sliceParamsByProperty.get(property).size()); - sliceByDimension.put(property.name(), sliceNode); - } - }); - - return sliceByDimension; - } - - private List> getObservations(Map buckets, QueryTree qt, Map nonQueryParams) { - List> observations = new ArrayList<>(); - - buckets.forEach((pv, count) -> { - Map observation = new LinkedHashMap<>(); - boolean queried = qt.getTopLevelPvNodes().contains(pv) - || pv.value().string().equals(nonQueryParams.get(QueryParams.ApiParams.OBJECT)); - if (!queried) { - observation.put("totalItems", count); - var url = makeFindUrl(qt.addToTopLevel(pv), nonQueryParams); - observation.put("view", Map.of(JsonLd.ID_KEY, url)); - observation.put("object", pv.value().description()); - observations.add(observation); - } - }); - - return observations; - } - - private Map getRangeTemplate(Property property, List nonQueryParams) { - var GtLtNodes = queryTree.getTopLevelPvNodes().stream() - .filter(pv -> pv.property().equals(property)) - .filter(pv -> switch (pv.operator()) { - case EQUALS, NOT_EQUALS -> false; - case GREATER_THAN_OR_EQUALS, GREATER_THAN, LESS_THAN_OR_EQUALS, LESS_THAN -> true; - }) - .filter(pv -> pv.value().isNumeric()) - .toList(); - - String min = null; - String max = null; - - for (var pv : GtLtNodes) { - var orEquals = pv.toOrEquals(); - if (orEquals.operator() == Operator.GREATER_THAN_OR_EQUALS && min == null) { - min = orEquals.value().string(); - } else if (orEquals.operator() == Operator.LESS_THAN_OR_EQUALS && max == null) { - max = orEquals.value().string(); - } else { - // Not a proper range, reset and abort - min = null; - max = null; - break; - } - } - - var tree = queryTree.removeTopLevelPropValueIfPropEquals(property); - - Map template = new LinkedHashMap<>(); - - var placeholderNode = new FreeText(Operator.EQUALS, String.format("{?%s}", property.name())); - var templateQueryString = tree.addToTopLevel(placeholderNode).toString(); - var templateUrl = makeFindUrl(tree.getTopLevelFreeText(), templateQueryString, nonQueryParams); - template.put("template", templateUrl); - - var mapping = new LinkedHashMap<>(); - mapping.put("variable", property.name()); - mapping.put(Operator.GREATER_THAN_OR_EQUALS.termKey, Objects.toString(min, "")); - mapping.put(Operator.LESS_THAN_OR_EQUALS.termKey, Objects.toString(max, "")); - template.put("mapping", mapping); - - return template; - } - - private List> getBoolFilters() { - var nonQueryParams = queryParams.getNonQueryParams(0); - List> results = new ArrayList<>(); - var existing = queryTree.getTopLevelNodes(); - - for (var f : appParams.siteFilters.optionalFilters()) { - QueryTree newTree; - boolean isSelected; - Node filter = f.getAlias().orElse(f.getExplicit()); - - if (existing.contains(filter)) { - newTree = queryTree.removeTopLevelNode(filter); - isSelected = true; - } else { - newTree = queryTree.addToTopLevel(filter); - isSelected = false; - } - - Map res = new LinkedHashMap<>(); - // TODO: fix form - res.put("totalItems", 0); - res.put("object", Map.of(JsonLd.TYPE_KEY, "Resource", "prefLabelByLang", f.getPrefLabelByLang())); - res.put("view", Map.of(JsonLd.ID_KEY, makeFindUrl(newTree, nonQueryParams))); - res.put("_selected", isSelected); - - results.add(res); - } - - return results; - } - - // TODO naming things "curated predicate links" ?? - private List> getCuratedPredicateLinks() { - var o = getObject(queryParams); - if (o == null) { - return Collections.emptyList(); - } - var curatedPredicates = curatedPredicates(o, appParams.relationFilters); - if (curatedPredicates.isEmpty()) { - return Collections.emptyList(); - } - QueryResult queryRes = new QueryResult(queryUtil.query(getCuratedPredicateEsQueryDsl(o, curatedPredicates))); - return predicateLinks(queryRes.pAggs, o, queryParams.getNonQueryParams(0)); - } - - private List curatedPredicates(Entity object, Map> relationFilters) { - return object.superclassesIncludingSelf() - .stream() - .filter(relationFilters::containsKey) - .findFirst().map(relationFilters::get) - .orElse(Collections.emptyList()); - } - - private Map getCuratedPredicateEsQueryDsl(Entity o, List curatedPredicates) { - var queryDsl = new LinkedHashMap(); - queryDsl.put("query", new PathValue("_links", Operator.EQUALS, o.id()).toEs()); - queryDsl.put("size", 0); - queryDsl.put("from", 0); - queryDsl.put("aggs", Aggs.buildPAggQuery(o, curatedPredicates, disambiguate)); - queryDsl.put("track_total_hits", true); - return queryDsl; - } - - private List> predicateLinks(List aggs, Entity object, - Map nonQueryParams) { - var result = new ArrayList>(); - Set selected = new HashSet<>(); - if (nonQueryParams.containsKey(QueryParams.ApiParams.PREDICATES)) { - selected.add(nonQueryParams.get(QueryParams.ApiParams.PREDICATES)); - } - - Map counts = aggs.stream().collect(Collectors.toMap(Aggs.Bucket::value, Aggs.Bucket::count)); - - for (String p : curatedPredicates(object, appParams.relationFilters)) { - if (!counts.containsKey(p)) { - continue; - } - - int count = counts.get(p); - - if (count > 0) { - Map params = new HashMap<>(nonQueryParams); - params.put(QueryParams.ApiParams.PREDICATES, p); - result.add(Map.of( - "totalItems", count, - "view", Map.of(JsonLd.ID_KEY, makeFindUrl(makeParams(params))), - "object", disambiguate.getDefinition(p), - "_selected", selected.contains(p) - )); - } - } - - return result; - } - - private Entity getObject(QueryParams queryParams) { - var object = queryParams.object; - if (object != null) { - return queryUtil.loadThing(object) - .map(thing -> thing.getOrDefault(JsonLd.TYPE_KEY, null)) - .map(JsonLd::asList) - .map(List::getFirst) - .map(type -> new Entity(object, (String) type, disambiguate.getSuperclasses((String) type))) - .orElse(null); - } - return null; - } -}