diff --git a/whelk-core/src/main/groovy/whelk/JsonLd.groovy b/whelk-core/src/main/groovy/whelk/JsonLd.groovy index b254357dc5..f4623f3b60 100644 --- a/whelk-core/src/main/groovy/whelk/JsonLd.groovy +++ b/whelk-core/src/main/groovy/whelk/JsonLd.groovy @@ -670,12 +670,6 @@ class JsonLd { //==== Class-hierarchies ==== - List getSuperClasses(String type) { - List res = [] - getSuperClasses(type, res) - return res - } - void getSuperClasses(String type, List result) { def termMap = vocabIndex[type] if (termMap == null) @@ -695,6 +689,13 @@ class JsonLd { } } + // Returns all superclasses in an ordered List of typeKeys + List getSuperClasses(String type) { + List allSuperClasses = new ArrayList<>() + getSuperClasses(type, allSuperClasses) + return allSuperClasses + } + private Map> generateSubTermLists(String relationToSuper) { Map> superTermOf = [:] for (String type : vocabIndex.keySet()) { @@ -730,6 +731,10 @@ class JsonLd { return type in bases } + List getDirectSubclasses(String type) { + return superClassOf.get(type) ?: [] + } + boolean isInstanceOf(Map entity, String baseType) { if (entity.is(null)) { return false diff --git a/whelk-core/src/main/groovy/whelk/search2/FacetTree.java b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java new file mode 100644 index 0000000000..5916a01b98 --- /dev/null +++ b/whelk-core/src/main/groovy/whelk/search2/FacetTree.java @@ -0,0 +1,138 @@ +package whelk.search2; + +import whelk.JsonLd; + +import java.util.*; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static whelk.util.DocumentUtil.getAtPath; + +public class FacetTree { + + private final JsonLd jsonLd; + private Map> keyToObservation = new HashMap<>(); + + public FacetTree(JsonLd jsonLd) { + this.jsonLd = jsonLd; + } + + public List> sortObservationsAsTree(List> observations) { + List> tree = new ArrayList<>(); + Queue> queue = new ConcurrentLinkedQueue<>(); + Set intermediateClasses = new HashSet<>(); + + keyToObservation = observations.stream() + .collect(Collectors.toMap(o -> jsonLd.toTermKey(get(o, List.of("object", "@id"), "")), Function.identity())); + + List rootCandidates = keyToObservation.keySet().stream().filter(this::isRootNode).toList(); + String rootKey = ""; + + if (rootCandidates.size() == 1) { + rootKey = rootCandidates.getFirst(); + var root = keyToObservation.get(rootKey); + tree.add(root); + queue.add(root); + } else { + Optional first = keyToObservation.keySet().stream().findFirst(); + if (first.isPresent()) { + Optional rootKeyOpt = getAbsentRoot(first.get()); + if (rootKeyOpt.isPresent()) { + rootKey = rootKeyOpt.get(); + var root = createFakeObservation(rootKey); + observations.add(root); + tree.add(root); + queue.add(root); + } + } + } + + for (String typeKey : keyToObservation.keySet()) { + if (!typeKey.equals(rootKey)) { + intermediateClasses.addAll(getIntermediateClassesFor(typeKey)); + } + } + + observations.addAll(intermediateClasses.stream().map(this::createFakeObservation).toList()); + + while (!queue.isEmpty()) { + var observation = queue.remove(); + var children = findChildren(observation, observations); + + if (!children.isEmpty()) { + queue.addAll(children); + observation.put("_children", children); + } + } + return List.copyOf(tree); + } + + private Map createFakeObservation(String termKey) { + Map fakeObservation = new LinkedHashMap<>(); + String termId = jsonLd.toTermId(termKey); + if (termId == null) { + // TODO: investigate!! + // Happens when observations are "" and "Dataset". + return new HashMap<>(); + } + var fakeObject = Map.of(JsonLd.ID_KEY, termId); + fakeObservation.put("totalItems", 0); + fakeObservation.put("view", Map.of(JsonLd.ID_KEY, "fake")); + fakeObservation.put("object", fakeObject); + return fakeObservation; + } + + private List getIntermediateClassesFor(String typeKey) { + return getAbsentSuperClasses(typeKey); + } + + private List getAbsentSuperClasses(String typeKey) { + List allSuperClasses = jsonLd.getSuperClasses(typeKey); + + return allSuperClasses.stream() + .takeWhile(s -> !keyToObservation.containsKey(s)) + .toList(); + } + + private Optional getAbsentRoot(String typeKey) { + List allSuperClasses = jsonLd.getSuperClasses(typeKey); + return allSuperClasses.stream() + .filter(this::subClassesContainsAllObservations) + .findFirst(); + } + + private boolean subClassesContainsAllObservations(String c) { + Set subClasses = jsonLd.getSubClasses(c); + return subClasses.containsAll(keyToObservation.keySet()); + } + + private boolean hasParentInObservations(String typeKey) { + List allSuperClasses = jsonLd.getSuperClasses(typeKey); + + return allSuperClasses.stream() + .anyMatch(s -> keyToObservation.containsKey(s)); + } + + private boolean isRootNode(String typeKey) { + return !hasParentInObservations(typeKey); + } + + private List> findChildren(Map observation, List> observations) { + return observations.stream() + .filter(o -> isDirectSubclass(o, observation)) + .collect(Collectors.toList()); + } + + private boolean isDirectSubclass(Map obsA, Map obsB) { + String idA = jsonLd.toTermKey(get(obsA, List.of("object", "@id"), "")); + String idB = jsonLd.toTermKey(get(obsB, List.of("object", "@id"), "")); + List directSubclasses = jsonLd.getDirectSubclasses(idB); + return directSubclasses.contains(idA); + } + + @SuppressWarnings("unchecked") + private static T get(Object m, List path, T defaultTo) { + return (T) getAtPath(m, path, defaultTo); + } +} diff --git a/whelk-core/src/main/groovy/whelk/search2/Query.java b/whelk-core/src/main/groovy/whelk/search2/Query.java index 97e6ca674c..bf960f8233 100644 --- a/whelk-core/src/main/groovy/whelk/search2/Query.java +++ b/whelk-core/src/main/groovy/whelk/search2/Query.java @@ -463,6 +463,10 @@ private Map buildSliceByDimension(Map(); var observations = getObservations(propertyKey, buckets); + + if (property.name().equals(JsonLd.Rdfs.RDF_TYPE)) { + observations = new FacetTree(whelk.getJsonld()).sortObservationsAsTree(observations); + } if (!observations.isEmpty()) { if (selectedFilters.isRangeFilter(propertyKey)) { sliceNode.put("search", getRangeTemplate(propertyKey)); diff --git a/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy new file mode 100644 index 0000000000..7579cdd61b --- /dev/null +++ b/whelk-core/src/test/groovy/whelk/search2/FacetTreeSpec.groovy @@ -0,0 +1,203 @@ +package whelk.search2 + +import spock.lang.Specification +import whelk.JsonLd + +class FacetTreeSpec extends Specification { + + JsonLd jsonLd + + void setup() { + jsonLd = GroovyMock(JsonLd.class) + jsonLd.toTermKey(_ as String) >> { String s -> s } + jsonLd.toTermId(_ as String) >> { String s -> s } + } + + def "Single observation should return list with one observation"() { + given: + jsonLd.getDirectSubclasses("parent") >> [] + jsonLd.getSuperClasses("parent") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]]] | [["object": ["@id": "parent"]]] + } + + def "Sort one parent and one child"() { + given: + jsonLd.getDirectSubclasses("parent") >> ["child"] + jsonLd.getDirectSubclasses("child") >> [] + + jsonLd.getSuperClasses("child") >> ["parent"] + jsonLd.getSuperClasses("parent") >> [] + + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "parent"]], + ["object": ["@id": "child"]]] | [["object": ["@id": "parent"], "_children": [["object": ["@id": "child"]]]]] + } + + def "Sort one parent with two children, superclasses of root should be ignored"() { + given: + jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] + jsonLd.getDirectSubclasses("Resource") >> ["root"] + jsonLd.getDirectSubclasses("child1") >> [] + jsonLd.getDirectSubclasses("child2") >> [] + + jsonLd.getSuperClasses("child1") >> ["root", "Resource"] + jsonLd.getSuperClasses("child2") >> ["child1", "root", "Resource"] + jsonLd.getSuperClasses("root") >> ["Resource"] + jsonLd.getSuperClasses("Resource") >> [] + + jsonLd.getSubClasses("Resource") >> ["root", "child1", "child2"] + jsonLd.getSubClasses("root") >> ["child1", "child2"] + jsonLd.getSubClasses("child1") >> [] + jsonLd.getSubClasses("child2") >> [] + + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], + "_children": [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } + + def "Sort one parent with one child that has one child"() { + given: + jsonLd.getDirectSubclasses("root") >> ["child1"] + jsonLd.getDirectSubclasses("child1") >> ["child2"] + jsonLd.getDirectSubclasses("child2") >> [] + + jsonLd.getSuperClasses("child1") >> ["root"] + jsonLd.getSuperClasses("child2") >> ["child1", "root"] + jsonLd.getSuperClasses("root") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], + "_children": [["object": ["@id": "child1"], + "_children": [["object": ["@id": "child2"]]]]]]] + } + + def "One parent, two children"() { + given: + jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] + jsonLd.getDirectSubclasses("child1") >> [] + jsonLd.getDirectSubclasses("child2") >> [] + + jsonLd.getSuperClasses("child1") >> ["root"] + jsonLd.getSuperClasses("child2") >> ["root"] + jsonLd.getSuperClasses("root") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["object": ["@id": "root"], "_children" : [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } + + def "Three root nodes"() { + given: + jsonLd.getDirectSubclasses("absentRoot") >> ["root1", "root2", "root3"] + jsonLd.getDirectSubclasses("root1") >> [] + jsonLd.getDirectSubclasses("root2") >> [] + jsonLd.getDirectSubclasses("root3") >> [] + + jsonLd.getSuperClasses("root1") >> ["absentRoot"] + jsonLd.getSuperClasses("root2") >> ["absentRoot"] + jsonLd.getSuperClasses("root3") >> ["absentRoot"] + jsonLd.getSuperClasses("absentRoot") >> [] + + jsonLd.getSubClasses("absentRoot") >> ["root1", "root2", "root3"] + jsonLd.getSubClasses("root1") >> [] + jsonLd.getSubClasses("root2") >> [] + jsonLd.getSubClasses("root3") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "root1"]], + ["object": ["@id": "root2"]], + ["object": ["@id": "root3"]]] | [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "absentRoot"], "_children": [["object": ["@id": "root1"]], + ["object": ["@id": "root2"]], + ["object": ["@id": "root3"]]]]] + } + + + def "Root with one intermediate observation before one child"() { + given: + jsonLd.getDirectSubclasses("root") >> ["intermediate"] + jsonLd.getDirectSubclasses("intermediate") >> ["child"] + jsonLd.getDirectSubclasses("child") >> [] + + jsonLd.getSuperClasses("child") >> ["intermediate", "root"] + jsonLd.getSuperClasses("root") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + // TODO: don't depend on exact form of fake observation + where: + observations | sorted + [["object": ["@id": "root"]], + ["object": ["@id": "child"]]] | [["object": ["@id": "root"], + "_children": [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "intermediate"], + "_children": [["object": ["@id": "child"]]]]]]] + } + + def "Absent root, two children"() { + given: + jsonLd.getDirectSubclasses("root") >> ["child1", "child2"] + jsonLd.getDirectSubclasses("child1") >> [] + jsonLd.getDirectSubclasses("child2") >> [] + + jsonLd.getSuperClasses("child1") >> ["root"] + jsonLd.getSuperClasses("child2") >> ["root"] + jsonLd.getSuperClasses("root") >> [] + + jsonLd.getSubClasses("root") >> ["child1", "child2"] + jsonLd.getSubClasses("child1") >> [] + jsonLd.getSubClasses("child2") >> [] + + expect: + def tree = new FacetTree(jsonLd) + tree.sortObservationsAsTree(observations) == sorted + + where: + observations | sorted + [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]] | [["totalItems" : 0, "view": ["@id" : "fake"], "object": ["@id": "root"], + "_children" : [["object": ["@id": "child1"]], + ["object": ["@id": "child2"]]]]] + } +}