From 962ed16544d8da886a0082d1f7549bb27126e19d Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Wed, 2 Nov 2022 10:45:31 +0100
Subject: [PATCH 1/9] WIP work merging.

---
 .../whelk/importer/DatasetImporter.groovy     |   4 +
 .../src/main/groovy/whelk/Document.groovy     |  10 ++
 .../src/main/groovy/whelk/WorkMerging.java    | 138 ++++++++++++++++++
 3 files changed, 152 insertions(+)
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging.java

diff --git a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
index c966ce8e3a..c9cfe318cc 100644
--- a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
+++ b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
@@ -58,6 +58,10 @@ class DatasetImporter {
     Map<String, String> aliasMap = [:]
 
     DatasetImporter(Whelk whelk, String datasetUri, Map flags=[:], String datasetDescPath=null) {
+        System.err.println("WARNING: Do not ever cancel an ongoing dataset loading operation using CTRL+C (or equivalent).\n"+
+        "Doing so poses a danger because dataset loading may under some circumstances need to perform\n"+
+        "operations that are not atomic, and loss of data can occur if the process is interrupted during\n"+
+        "such an operation.")
         this.whelk = whelk
         this.datasetUri = datasetUri
         if (datasetDescPath != null) {
diff --git a/whelk-core/src/main/groovy/whelk/Document.groovy b/whelk-core/src/main/groovy/whelk/Document.groovy
index 0580d6de04..3c1932e986 100644
--- a/whelk-core/src/main/groovy/whelk/Document.groovy
+++ b/whelk-core/src/main/groovy/whelk/Document.groovy
@@ -20,6 +20,10 @@ import static whelk.util.Jackson.mapper
  * A document is represented as a data Map (containing Maps, Lists and Value objects).
  *
  * This class serves as a wrapper around such a map, with access methods for specific parts of the data.
+ *
+ * TODO:
+ * Many of the accessors of this class assumes the underlying data is an INSTANCE. We may want to break that
+ * assumption up at some point down the line, or check that it is actually the case in the accessors themselves.
  */
 @Log
 class Document {
@@ -51,6 +55,7 @@ class Document {
     static final List thingInSchemePath = ["@graph",1,"inScheme","@id"]
     static final List recordIdPath = ["@graph", 0, "@id"]
     static final List workIdPath = ["@graph", 1, "instanceOf", "@id"]
+    static final List workPath = ["@graph", 1, "instanceOf"]
     static final List thingMetaPath = ["@graph", 1, "meta", "@id"]
     static final List recordSameAsPath = ["@graph", 0, "sameAs"]
     static final List recordTypedIDsPath = ["@graph", 0, "identifiedBy"]
@@ -177,6 +182,11 @@ class Document {
 
     void setThingMeta(meta) { set(thingMetaPath, meta) }
 
+    Map getWorkEntity() { return get(workPath) }
+
+    void setWorkEntity(work) { set(workPath, work) }
+
+
     /**
      * Will have base URI prepended if not already there
      */
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging.java b/whelk-core/src/main/groovy/whelk/WorkMerging.java
new file mode 100644
index 0000000000..3747160792
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging.java
@@ -0,0 +1,138 @@
+package whelk;
+
+import whelk.component.PostgreSQLComponent;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class WorkMerging {
+
+    /**
+     * Merge the works of all listed instances into one. The listed instances
+     * may or may not have external works already. Orphaned work records will be
+     * deleted.
+     *
+     * This is _not_ one atomic operation, but rather a series of operations.
+     * This means that it is possible to observe the process halfway though from the
+     * outside. It also means that should the process be stopped halfway through,
+     * results may look odd (but will still obey basic data integrity rules).
+     *
+     * In the worst case scenario, if the process is interrupted just after the orphans
+     * have been deleted, but their sameAs-uris have not yet been moved to the merged
+     * work, those sameAs-uris will be lost. This risk cannot be avoided without compromising
+     * the URI integrity checks of the underlying code (two records are never allowed to
+     * have the same URI at the same time).
+     *
+     * Returns the URI of the one remaining (or new) work that all of the instances
+     * now link to.
+     */
+    public static String mergeWorksOf(List<String> instanceIDs, Whelk whelk) {
+
+        List<Document> instances = collectInstancesOfThisWork(instanceIDs, whelk);
+
+        Document baseWork = selectBaseWork(instances, whelk);
+        String baseWorkUri = baseWork.getThingIdentifiers().get(0);
+
+        // Relink the instances and collect all work aliases
+        Map linkEntity = new HashMap();
+        linkEntity.put("@id", baseWorkUri);
+        List<String> workAlternateUris = new ArrayList<>();
+        for (Document instance : instances) {
+            workAlternateUris.addAll( instance.getThingIdentifiers() );
+            if (!instance.getWorkEntity().equals(linkEntity)) { // If not already linked to the correct record
+                whelk.storeAtomicUpdate(instance.getShortId(), true, false, true, "xl", null, (Document doc) -> {
+                    doc.setWorkEntity(linkEntity);
+                });
+            }
+        }
+
+        // Merge other works into the baseWork. This must be done first, before any orphans can be deleted,
+        // or we risk loosing data if the process is interrupted.
+        whelk.storeAtomicUpdate(baseWork.getShortId(), true, false, true, "xl", null, (Document doc) -> {
+            // TODO MERGE HERE
+        });
+
+        // Cleanup no longer linked work records
+        for (Document instance : instances) {
+            Map workEntity = instance.getWorkEntity();
+            String workUri = (String) workEntity.get("@id");
+            String workId = whelk.getStorage().getSystemIdByIri(workUri);
+            if (workEntity.size() == 1
+                    && workEntity.containsKey("@id")
+                    && !workEntity.equals(linkEntity)
+                    && whelk.getStorage().getDependers(workId).isEmpty()) {
+                String orphanID = whelk.getStorage().getSystemIdByIri((String)workEntity.get("@id"));
+                whelk.remove(orphanID, "xl", null);
+            }
+        }
+
+        // We must now save the baseWork a second time, to add all of the sameAs identifiers.
+        // These could not be added the first time, because they still belonged to other records
+        // that were not yet deleted.
+        whelk.storeAtomicUpdate(baseWork.getShortId(), true, false, true, "xl", null, (Document doc) -> {
+            for (String uri : workAlternateUris)
+                baseWork.addThingIdentifier(uri);
+        });
+
+        return baseWorkUri;
+    }
+
+    /**
+     * Find the set of instances that should link to the merged work. This of course includes the
+     * passed instanceIDs, but also any other instances already sharing a work with one of those IDs.
+     */
+    private static List<Document> collectInstancesOfThisWork(List<String> instanceIDs, Whelk whelk) {
+        List<Document> instances = new ArrayList<>(instanceIDs.size());
+        for (String instanceID : instanceIDs) {
+            Document instance = whelk.getDocument(instanceID);
+            instances.add( instance );
+
+            // Are there other instances linking to the same work as 'instance' ? If so add them to the
+            // collection to (possibly) re-link as well.
+            Map workEntity = instance.getWorkEntity();
+            if (workEntity.size() == 1 && workEntity.containsKey("@id")) {
+                String workUri = (String) workEntity.get("@id");
+                String workId = whelk.getStorage().getSystemIdByIri(workUri);
+                for (String otherInstanceId : whelk.getStorage().getDependers(workId)) {
+                    Document otherInstance = whelk.getDocument(otherInstanceId);
+                    instances.add( otherInstance );
+                }
+            }
+        }
+        return instances;
+    }
+
+    /**
+     * Select (or create+save) a work record that should be used going forward for
+     * all of the passed instances.
+     */
+    private static Document selectBaseWork(List<Document> instances, Whelk whelk) {
+        // Find all the works
+        List<String> linkedWorkURIs = new ArrayList<>();
+        List<Map> embeddedWorks = new ArrayList<>();
+        for (Document instance : instances) {
+            Map workEntity = instance.getWorkEntity();
+            if (workEntity.size() == 1 && workEntity.containsKey("@id")) {
+                linkedWorkURIs.add( (String) workEntity.get("@id"));
+            } else {
+                embeddedWorks.add(workEntity);
+            }
+        }
+
+        // Pick a linked one if any such exist, otherwise break off an embedded one
+        String baseWorkUri = null;
+        if (!linkedWorkURIs.isEmpty()) {
+            baseWorkUri = linkedWorkURIs.get(0); // TODO: Be a little smarter about _which_ work we pick?
+        } else {
+            Document newWork = new Document(embeddedWorks.get(0)); // TODO: Be a little smarter about _which_ work we break off?
+            newWork.deepReplaceId(Document.getBASE_URI().toString() + IdGenerator.generate());
+            newWork.setControlNumber(newWork.getShortId());
+            whelk.createDocument(newWork, "xl", null, "auth", false);
+            baseWorkUri = newWork.getThingIdentifiers().get(0);
+        }
+
+        return whelk.getStorage().loadDocumentByMainId(baseWorkUri);
+    }
+}

From ba10ba71e9bf5a7689706054c086dd4918aa3eb8 Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Wed, 2 Nov 2022 13:24:05 +0100
Subject: [PATCH 2/9] Do removal of orphaned works and transfer of mainEntity
 IDs within a transaction

This is necessary because the fundamental data integrity rules do not allow two
records to hold the same URI at the same time. In other words:

1. We can't first write URIs from the disappearing record to the remaining one.
   As that would mean both having the URIs at the same time (forbidden).

2. We can't delete the disappearing record first, and then write the URIs to the
   remaining one, because that would require holding the URIs in volatile memory
   only for a short while in between. If the process was to die or be cancelled
   during this window, the URIs would be permanently lost.

Therefore the removal and transfer of URIs need to happen within one and the same
transaction.
---
 .../whelk/importer/DatasetImporter.groovy     |  4 ----
 .../src/main/groovy/whelk/WorkMerging.java    | 20 ++-----------------
 .../component/PostgreSQLComponent.groovy      | 13 ++++++++++++
 3 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
index c9cfe318cc..c966ce8e3a 100644
--- a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
+++ b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
@@ -58,10 +58,6 @@ class DatasetImporter {
     Map<String, String> aliasMap = [:]
 
     DatasetImporter(Whelk whelk, String datasetUri, Map flags=[:], String datasetDescPath=null) {
-        System.err.println("WARNING: Do not ever cancel an ongoing dataset loading operation using CTRL+C (or equivalent).\n"+
-        "Doing so poses a danger because dataset loading may under some circumstances need to perform\n"+
-        "operations that are not atomic, and loss of data can occur if the process is interrupted during\n"+
-        "such an operation.")
         this.whelk = whelk
         this.datasetUri = datasetUri
         if (datasetDescPath != null) {
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging.java b/whelk-core/src/main/groovy/whelk/WorkMerging.java
index 3747160792..c8a95f1088 100644
--- a/whelk-core/src/main/groovy/whelk/WorkMerging.java
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging.java
@@ -19,12 +19,6 @@ public class WorkMerging {
      * outside. It also means that should the process be stopped halfway through,
      * results may look odd (but will still obey basic data integrity rules).
      *
-     * In the worst case scenario, if the process is interrupted just after the orphans
-     * have been deleted, but their sameAs-uris have not yet been moved to the merged
-     * work, those sameAs-uris will be lost. This risk cannot be avoided without compromising
-     * the URI integrity checks of the underlying code (two records are never allowed to
-     * have the same URI at the same time).
-     *
      * Returns the URI of the one remaining (or new) work that all of the instances
      * now link to.
      */
@@ -35,12 +29,10 @@ public static String mergeWorksOf(List<String> instanceIDs, Whelk whelk) {
         Document baseWork = selectBaseWork(instances, whelk);
         String baseWorkUri = baseWork.getThingIdentifiers().get(0);
 
-        // Relink the instances and collect all work aliases
+        // Relink the instances
         Map linkEntity = new HashMap();
         linkEntity.put("@id", baseWorkUri);
-        List<String> workAlternateUris = new ArrayList<>();
         for (Document instance : instances) {
-            workAlternateUris.addAll( instance.getThingIdentifiers() );
             if (!instance.getWorkEntity().equals(linkEntity)) { // If not already linked to the correct record
                 whelk.storeAtomicUpdate(instance.getShortId(), true, false, true, "xl", null, (Document doc) -> {
                     doc.setWorkEntity(linkEntity);
@@ -64,18 +56,10 @@ public static String mergeWorksOf(List<String> instanceIDs, Whelk whelk) {
                     && !workEntity.equals(linkEntity)
                     && whelk.getStorage().getDependers(workId).isEmpty()) {
                 String orphanID = whelk.getStorage().getSystemIdByIri((String)workEntity.get("@id"));
-                whelk.remove(orphanID, "xl", null);
+                whelk.getStorage().removeAndTransferMainEntityURIs(orphanID, baseWork.getShortId());
             }
         }
 
-        // We must now save the baseWork a second time, to add all of the sameAs identifiers.
-        // These could not be added the first time, because they still belonged to other records
-        // that were not yet deleted.
-        whelk.storeAtomicUpdate(baseWork.getShortId(), true, false, true, "xl", null, (Document doc) -> {
-            for (String uri : workAlternateUris)
-                baseWork.addThingIdentifier(uri);
-        });
-
         return baseWorkUri;
     }
 
diff --git a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy
index 2b00823fc6..9c176d75cc 100644
--- a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy
+++ b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy
@@ -991,6 +991,19 @@ class PostgreSQLComponent {
         return doc
     }
 
+    public removeAndTransferMainEntityURIs(String removeID, String inheritsAliasesID) {
+        withDbConnection {
+            Connection connection = getMyConnection()
+            Document from = lockAndLoad(removeID, connection)
+            remove(from.getShortId(), "xl", null, false)
+            storeUpdate(inheritsAliasesID, true, false, true, "xl", null, { to ->
+                from.getThingIdentifiers().each {
+                    to.addThingIdentifier(it)
+                }
+            })
+        }
+    }
+
     private Document lockAndLoad(String id, Connection connection) throws DocumentNotFoundException {
         PreparedStatement statement = null
         ResultSet resultSet = null

From 7908be9151344097c02744184b5424d3a45b56d2 Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Wed, 2 Nov 2022 13:49:44 +0100
Subject: [PATCH 3/9] WIP merging works.

---
 .../src/main/groovy/whelk/WorkMerging.java    | 39 +++++++++++--------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging.java b/whelk-core/src/main/groovy/whelk/WorkMerging.java
index c8a95f1088..1d7686567d 100644
--- a/whelk-core/src/main/groovy/whelk/WorkMerging.java
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging.java
@@ -28,15 +28,17 @@ public static String mergeWorksOf(List<String> instanceIDs, Whelk whelk) {
 
         Document baseWork = selectBaseWork(instances, whelk);
         String baseWorkUri = baseWork.getThingIdentifiers().get(0);
+        Map correctLinkEntity = new HashMap();
+        correctLinkEntity.put("@id", baseWorkUri);
 
-        // Relink the instances
-        Map linkEntity = new HashMap();
-        linkEntity.put("@id", baseWorkUri);
+        // Collect all already existing external works (different from our target) before relinking
+        List<String> orphanIDs = new ArrayList<>();
         for (Document instance : instances) {
-            if (!instance.getWorkEntity().equals(linkEntity)) { // If not already linked to the correct record
-                whelk.storeAtomicUpdate(instance.getShortId(), true, false, true, "xl", null, (Document doc) -> {
-                    doc.setWorkEntity(linkEntity);
-                });
+            Map workEntity = instance.getWorkEntity();
+            if (workEntity.size() == 1 && !workEntity.equals(correctLinkEntity)) {
+                String workUri = (String) workEntity.get("@id");
+                String workId = whelk.getStorage().getSystemIdByIri(workUri);
+                orphanIDs.add(workId);
             }
         }
 
@@ -46,17 +48,22 @@ public static String mergeWorksOf(List<String> instanceIDs, Whelk whelk) {
             // TODO MERGE HERE
         });
 
-        // Cleanup no longer linked work records
+        // Relink the instances
         for (Document instance : instances) {
-            Map workEntity = instance.getWorkEntity();
-            String workUri = (String) workEntity.get("@id");
-            String workId = whelk.getStorage().getSystemIdByIri(workUri);
-            if (workEntity.size() == 1
-                    && workEntity.containsKey("@id")
-                    && !workEntity.equals(linkEntity)
-                    && whelk.getStorage().getDependers(workId).isEmpty()) {
-                String orphanID = whelk.getStorage().getSystemIdByIri((String)workEntity.get("@id"));
+            if (!instance.getWorkEntity().equals(correctLinkEntity)) { // If not already linked to the correct record
+                whelk.storeAtomicUpdate(instance.getShortId(), true, false, true, "xl", null, (Document doc) -> {
+                    doc.setWorkEntity(correctLinkEntity);
+                });
+            }
+        }
+
+        // Cleanup no longer linked work records
+        for (String orphanID : orphanIDs) {
+            try {
                 whelk.getStorage().removeAndTransferMainEntityURIs(orphanID, baseWork.getShortId());
+            } catch (RuntimeException e) {
+                // Expected possible cause of exception: A new link was added to this work, _after_ we collected
+                // and relinked the instances of it. In this (theoretical) case, just leave the old work in place.
             }
         }
 

From 74784f6dbca2edfb769d3df549a02ab4bd87ed8c Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Mon, 7 Nov 2022 12:44:42 +0100
Subject: [PATCH 4/9] WIP: work merging by dataset loader.

---
 .../whelk/importer/DatasetImporter.groovy     | 45 +++++++++---
 .../src/main/groovy/whelk/WorkMerging.java    | 72 ++++++++++++++-----
 2 files changed, 91 insertions(+), 26 deletions(-)

diff --git a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
index c966ce8e3a..9bd99cd915 100644
--- a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
+++ b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
@@ -2,6 +2,8 @@ package whelk.importer
 
 import groovy.util.logging.Log4j2 as Log
 import groovy.transform.CompileStatic
+import whelk.WorkMerging
+
 import static groovy.transform.TypeCheckingMode.SKIP
 
 import whelk.Document
@@ -113,15 +115,23 @@ class DatasetImporter {
             Document incomingDoc = completeRecord(data, recordType, true)
             idsInInput.add(incomingDoc.getShortId())
 
-            // This race condition should be benign. If there is a document with
-            // the same ID created in between the check and the creation, we'll
-            // get an exception and fail early (unfortunate but acceptable).
-            switch (createOrUpdateDocument(incomingDoc)) {
-                case WRITE_RESULT.CREATED:
-                    createdCount++;
-                    break;
-                case WRITE_RESULT.UPDATED:
-                    updatedCount++;
+            if (data.get("@type") != null &&
+                    whelk.getJsonld().isSubClassOf( incomingDoc.getThingType(), "Work" )) {
+
+                createOrUpdateWork(incomingDoc)
+
+            } else { // Not a work
+
+                // This race condition should be benign. If there is a document with
+                // the same ID created in between the check and the creation, we'll
+                // get an exception and fail early (unfortunate but acceptable).
+                switch (createOrUpdateDocument(incomingDoc)) {
+                    case WRITE_RESULT.CREATED:
+                        createdCount++;
+                        break;
+                    case WRITE_RESULT.UPDATED:
+                        updatedCount++;
+                }
             }
 
             if ( lineCount % 100 == 0 ) {
@@ -286,8 +296,8 @@ class DatasetImporter {
     }
 
     private WRITE_RESULT createOrUpdateDocument(Document incomingDoc) {
-        Document storedDoc = whelk.getDocument(incomingDoc.getShortId())
         WRITE_RESULT result
+        Document storedDoc = whelk.getDocument(incomingDoc.getShortId())
         if (storedDoc != null) {
             if (whelk.storeAtomicUpdate(incomingDoc.getShortId(), true, false, refreshDependers, "xl", null, { doc ->
                     doc.data = incomingDoc.data
@@ -300,9 +310,24 @@ class DatasetImporter {
             whelk.createDocument(incomingDoc, "xl", null, collection, false)
             result = WRITE_RESULT.CREATED
         }
+
         return result
     }
 
+    private void createOrUpdateWork(Document incomingWork) {
+        List bibIDs = []
+        List graphList = incomingWork.data.get("@graph")
+        Map mainEntity = graphList[1]
+        mainEntity.get("@reverse", [:]).get("instanceOf", []).each { bib ->
+            String instanceID = whelk.getStorage().getSystemIdByIri( (String) bib["@id"] )
+            if (instanceID != null)
+                bibIDs.add(instanceID)
+        }
+        if (!bibIDs.isEmpty()) {
+            WorkMerging.mergeWorksOf(bibIDs, [incomingWork], whelk)
+        }
+    }
+
     private long removeDeleted(Set<String> idsInInput, List<String> needsRetry) {
         // Clear out anything that was previously stored in this dataset, but was not in the in-data now.
         // If faced with "can't delete depended on stuff", retry again later, after more other deletes have
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging.java b/whelk-core/src/main/groovy/whelk/WorkMerging.java
index 1d7686567d..3435bc43b0 100644
--- a/whelk-core/src/main/groovy/whelk/WorkMerging.java
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging.java
@@ -2,17 +2,14 @@
 
 import whelk.component.PostgreSQLComponent;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 
 public class WorkMerging {
 
     /**
      * Merge the works of all listed instances into one. The listed instances
      * may or may not have external works already. Orphaned work records will be
-     * deleted.
+     * deleted. Extra (previously unsaved) works may optionally be supplied.
      *
      * This is _not_ one atomic operation, but rather a series of operations.
      * This means that it is possible to observe the process halfway though from the
@@ -22,11 +19,11 @@ public class WorkMerging {
      * Returns the URI of the one remaining (or new) work that all of the instances
      * now link to.
      */
-    public static String mergeWorksOf(List<String> instanceIDs, Whelk whelk) {
+    public static String mergeWorksOf(List<String> instanceIDs, List<Document> extraWorks, Whelk whelk) {
 
         List<Document> instances = collectInstancesOfThisWork(instanceIDs, whelk);
 
-        Document baseWork = selectBaseWork(instances, whelk);
+        Document baseWork = selectBaseWork(instances, extraWorks, whelk);
         String baseWorkUri = baseWork.getThingIdentifiers().get(0);
         Map correctLinkEntity = new HashMap();
         correctLinkEntity.put("@id", baseWorkUri);
@@ -42,11 +39,13 @@ public static String mergeWorksOf(List<String> instanceIDs, Whelk whelk) {
             }
         }
 
+        System.err.println("**** SELECTED BASE: " + baseWork.getThingIdentifiers().get(0));
+
         // Merge other works into the baseWork. This must be done first, before any orphans can be deleted,
         // or we risk loosing data if the process is interrupted.
-        whelk.storeAtomicUpdate(baseWork.getShortId(), true, false, true, "xl", null, (Document doc) -> {
+        /*whelk.storeAtomicUpdate(baseWork.getShortId(), true, false, true, "xl", null, (Document doc) -> {
             // TODO MERGE HERE
-        });
+        });*/
 
         // Relink the instances
         for (Document instance : instances) {
@@ -99,7 +98,7 @@ private static List<Document> collectInstancesOfThisWork(List<String> instanceID
      * Select (or create+save) a work record that should be used going forward for
      * all of the passed instances.
      */
-    private static Document selectBaseWork(List<Document> instances, Whelk whelk) {
+    private static Document selectBaseWork(List<Document> instances, List<Document> extraWorks, Whelk whelk) {
         // Find all the works
         List<String> linkedWorkURIs = new ArrayList<>();
         List<Map> embeddedWorks = new ArrayList<>();
@@ -112,16 +111,57 @@ private static Document selectBaseWork(List<Document> instances, Whelk whelk) {
             }
         }
 
-        // Pick a linked one if any such exist, otherwise break off an embedded one
+        // Order of priority:
+        // 1. Any pre existing linked work records
+        // 2. Any supplied extra works
+        // 3. Any embedded work from one of the instances
+
+        // Pick a linked one if any such exist (1)
         String baseWorkUri = null;
         if (!linkedWorkURIs.isEmpty()) {
             baseWorkUri = linkedWorkURIs.get(0); // TODO: Be a little smarter about _which_ work we pick?
-        } else {
-            Document newWork = new Document(embeddedWorks.get(0)); // TODO: Be a little smarter about _which_ work we break off?
-            newWork.deepReplaceId(Document.getBASE_URI().toString() + IdGenerator.generate());
-            newWork.setControlNumber(newWork.getShortId());
+        } else if(!extraWorks.isEmpty()) { // Any supplied extra work (2)
+            Document selectedWork = extraWorks.get(0);
+
+            String slug = IdGenerator.generate();
+            String recordId = Document.getBASE_URI().toString() + slug;
+            String mainEntityId = recordId + "#it";
+            Document._set(Document.getRecordIdPath(), recordId, selectedWork.data);
+            Document._set(Document.getThingIdPath(), mainEntityId, selectedWork.data);
+            Document._set(Document.getThingIdPath2(), mainEntityId, selectedWork.data);
+
+            ((Map)(((List)selectedWork.data.get("@graph")).get(1))).remove("@reverse"); // ugh
+            
+            whelk.createDocument(selectedWork, "xl", null, "auth", false);
+            baseWorkUri = selectedWork.getThingIdentifiers().get(0);
+        } else { // Otherwise break off an embedded one (3)
+            String slug = IdGenerator.generate();
+            String recordId = Document.getBASE_URI().toString() + slug;
+            String mainEntityId = recordId + "#it";
+
+            Map chosenEmbedded = embeddedWorks.get(0); // TODO: Be a little smarter about _which_ work we break off?
+
+            Map docMap = new HashMap();
+            List graph = new ArrayList();
+            Map record = new HashMap();
+            docMap.put("@graph", graph);
+
+            graph.add(record);
+            record.put("@id", Document.getBASE_URI().toString() + slug);
+            record.put("@type", "Record");
+            Map mainEntityLink = new HashMap();
+            mainEntityLink.put("@id", mainEntityId);
+            record.put("mainEntity", mainEntityLink);
+
+            graph.add(chosenEmbedded);
+            chosenEmbedded.put("@id", mainEntityId);
+
+            Document newWork = new Document(docMap);
+            newWork.setControlNumber(slug);
+            newWork.setGenerationDate(new Date());
+            //newWork.setGenerationProcess("https://id.kb.se/datasetimporter"); // TODO: KOLLA MED FORMAT!!
             whelk.createDocument(newWork, "xl", null, "auth", false);
-            baseWorkUri = newWork.getThingIdentifiers().get(0);
+            baseWorkUri = mainEntityId;
         }
 
         return whelk.getStorage().loadDocumentByMainId(baseWorkUri);

From b27d41425c2aa2d0553bc2b23211f8e25f2fe6ba Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Mon, 7 Nov 2022 13:19:25 +0100
Subject: [PATCH 5/9] WIP work merging. Can't use new IDs, or we loose
 syncability of datasets.

---
 whelk-core/src/main/groovy/whelk/WorkMerging.java | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging.java b/whelk-core/src/main/groovy/whelk/WorkMerging.java
index 3435bc43b0..c1f67eef3c 100644
--- a/whelk-core/src/main/groovy/whelk/WorkMerging.java
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging.java
@@ -123,15 +123,8 @@ private static Document selectBaseWork(List<Document> instances, List<Document>
         } else if(!extraWorks.isEmpty()) { // Any supplied extra work (2)
             Document selectedWork = extraWorks.get(0);
 
-            String slug = IdGenerator.generate();
-            String recordId = Document.getBASE_URI().toString() + slug;
-            String mainEntityId = recordId + "#it";
-            Document._set(Document.getRecordIdPath(), recordId, selectedWork.data);
-            Document._set(Document.getThingIdPath(), mainEntityId, selectedWork.data);
-            Document._set(Document.getThingIdPath2(), mainEntityId, selectedWork.data);
-
             ((Map)(((List)selectedWork.data.get("@graph")).get(1))).remove("@reverse"); // ugh
-            
+
             whelk.createDocument(selectedWork, "xl", null, "auth", false);
             baseWorkUri = selectedWork.getThingIdentifiers().get(0);
         } else { // Otherwise break off an embedded one (3)

From 0b01c79d0e744ccddad4d7b558f49dfd3ba59609 Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Mon, 7 Nov 2022 13:41:29 +0100
Subject: [PATCH 6/9] Track updates/creates for work records too.

---
 .../whelk/importer/DatasetImporter.groovy     |  9 ++++-
 .../src/main/groovy/whelk/WorkMerging.java    | 34 ++++++++++++-------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
index 9bd99cd915..078a6f6985 100644
--- a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
+++ b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
@@ -99,6 +99,7 @@ class DatasetImporter {
 
         long updatedCount = 0
         long createdCount = 0
+
         long lineCount = 1 // The data sets self describing first record also counts.
 
         boolean first = true
@@ -118,7 +119,13 @@ class DatasetImporter {
             if (data.get("@type") != null &&
                     whelk.getJsonld().isSubClassOf( incomingDoc.getThingType(), "Work" )) {
 
-                createOrUpdateWork(incomingDoc)
+                switch (createOrUpdateWork(incomingDoc)) {
+                    case WorkMerging.WRITE_RESULT.CREATED:
+                        createdCount++;
+                        break;
+                    case WorkMerging.WRITE_RESULT.UPDATED:
+                        updatedCount++;
+                }
 
             } else { // Not a work
 
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging.java b/whelk-core/src/main/groovy/whelk/WorkMerging.java
index c1f67eef3c..6d9fb2e6d0 100644
--- a/whelk-core/src/main/groovy/whelk/WorkMerging.java
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging.java
@@ -1,11 +1,20 @@
 package whelk;
 
-import whelk.component.PostgreSQLComponent;
-
 import java.util.*;
 
 public class WorkMerging {
 
+    public enum WRITE_RESULT {
+        ALREADY_UP_TO_DATE,
+        UPDATED,
+        CREATED
+    }
+
+    // No proper pointers or multiple return values in Java :(
+    private static class WriteResultReference {
+        public WRITE_RESULT result = WRITE_RESULT.ALREADY_UP_TO_DATE;
+    }
+
     /**
      * Merge the works of all listed instances into one. The listed instances
      * may or may not have external works already. Orphaned work records will be
@@ -15,15 +24,14 @@ public class WorkMerging {
      * This means that it is possible to observe the process halfway though from the
      * outside. It also means that should the process be stopped halfway through,
      * results may look odd (but will still obey basic data integrity rules).
-     *
-     * Returns the URI of the one remaining (or new) work that all of the instances
-     * now link to.
      */
-    public static String mergeWorksOf(List<String> instanceIDs, List<Document> extraWorks, Whelk whelk) {
+    public static WRITE_RESULT mergeWorksOf(List<String> instanceIDs, List<Document> extraWorks, Whelk whelk) {
+
+        WriteResultReference result = new WriteResultReference();
 
         List<Document> instances = collectInstancesOfThisWork(instanceIDs, whelk);
 
-        Document baseWork = selectBaseWork(instances, extraWorks, whelk);
+        Document baseWork = selectBaseWork(instances, extraWorks, result, whelk);
         String baseWorkUri = baseWork.getThingIdentifiers().get(0);
         Map correctLinkEntity = new HashMap();
         correctLinkEntity.put("@id", baseWorkUri);
@@ -39,12 +47,10 @@ public static String mergeWorksOf(List<String> instanceIDs, List<Document> extra
             }
         }
 
-        System.err.println("**** SELECTED BASE: " + baseWork.getThingIdentifiers().get(0));
-
         // Merge other works into the baseWork. This must be done first, before any orphans can be deleted,
         // or we risk loosing data if the process is interrupted.
         /*whelk.storeAtomicUpdate(baseWork.getShortId(), true, false, true, "xl", null, (Document doc) -> {
-            // TODO MERGE HERE
+            // TODO MERGE HERE AND DONT FORGET TO SET result.result IF ANYTHING CHANGES!
         });*/
 
         // Relink the instances
@@ -66,7 +72,7 @@ public static String mergeWorksOf(List<String> instanceIDs, List<Document> extra
             }
         }
 
-        return baseWorkUri;
+        return result.result;
     }
 
     /**
@@ -98,7 +104,7 @@ private static List<Document> collectInstancesOfThisWork(List<String> instanceID
      * Select (or create+save) a work record that should be used going forward for
      * all of the passed instances.
      */
-    private static Document selectBaseWork(List<Document> instances, List<Document> extraWorks, Whelk whelk) {
+    private static Document selectBaseWork(List<Document> instances, List<Document> extraWorks, WriteResultReference result, Whelk whelk) {
         // Find all the works
         List<String> linkedWorkURIs = new ArrayList<>();
         List<Map> embeddedWorks = new ArrayList<>();
@@ -126,6 +132,7 @@ private static Document selectBaseWork(List<Document> instances, List<Document>
             ((Map)(((List)selectedWork.data.get("@graph")).get(1))).remove("@reverse"); // ugh
 
             whelk.createDocument(selectedWork, "xl", null, "auth", false);
+            result.result = WRITE_RESULT.CREATED;
             baseWorkUri = selectedWork.getThingIdentifiers().get(0);
         } else { // Otherwise break off an embedded one (3)
             String slug = IdGenerator.generate();
@@ -152,8 +159,9 @@ private static Document selectBaseWork(List<Document> instances, List<Document>
             Document newWork = new Document(docMap);
             newWork.setControlNumber(slug);
             newWork.setGenerationDate(new Date());
-            //newWork.setGenerationProcess("https://id.kb.se/datasetimporter"); // TODO: KOLLA MED FORMAT!!
+            //newWork.setGenerationProcess("https://id.kb.se/workmerger"); // TODO: KOLLA MED FORMAT!!
             whelk.createDocument(newWork, "xl", null, "auth", false);
+            result.result = WRITE_RESULT.CREATED;
             baseWorkUri = mainEntityId;
         }
 

From 61da6d6e81fba6edcc3b6d5bde5f4147bac5ed66 Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Mon, 7 Nov 2022 13:51:27 +0100
Subject: [PATCH 7/9] Cleanup

---
 importers/src/main/groovy/whelk/importer/DatasetImporter.groovy | 1 -
 1 file changed, 1 deletion(-)

diff --git a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
index 078a6f6985..3fddb229c8 100644
--- a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
+++ b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
@@ -99,7 +99,6 @@ class DatasetImporter {
 
         long updatedCount = 0
         long createdCount = 0
-
         long lineCount = 1 // The data sets self describing first record also counts.
 
         boolean first = true

From 8758d0fad8843d28ae70ff7676a5a0cc6d47f91a Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Tue, 8 Nov 2022 14:53:27 +0100
Subject: [PATCH 8/9] Move already existing work merging code in from an
 experimental branch

The code in question was taken as is from commit 3d2882cce and moved into
whelk-core.
---
 .../whelk/importer/DatasetImporter.groovy     |   3 +-
 .../whelk/WorkMerging/DisjointSets.java       | 220 +++++
 .../main/groovy/whelk/WorkMerging/Doc.groovy  | 363 ++++++++
 .../whelk/WorkMerging/DocumentComparator.java | 231 +++++
 .../whelk/WorkMerging/FieldStatus.groovy      |   7 +
 .../main/groovy/whelk/WorkMerging/Html.groovy | 111 +++
 .../main/groovy/whelk/WorkMerging/Util.groovy | 306 +++++++
 .../whelk/WorkMerging/WorkComparator.groovy   | 136 +++
 .../whelk/{ => WorkMerging}/WorkMerging.java  |   6 +-
 .../whelk/WorkMerging/WorkToolJob.groovy      | 797 ++++++++++++++++++
 .../WorkMerging/compare/Classification.groovy |  63 ++
 .../whelk/WorkMerging/compare/Default.groovy  |  13 +
 .../whelk/WorkMerging/compare/Extent.groovy   |  15 +
 .../WorkMerging/compare/FieldHandler.groovy   |  12 +
 .../WorkMerging/compare/GenreForm.groovy      |  36 +
 .../WorkMerging/compare/SameOrEmpty.groovy    |  21 +
 .../whelk/WorkMerging/compare/StuffSet.groovy |  38 +
 .../whelk/WorkMerging/compare/Subject.groovy  |   8 +
 .../WorkMerging/compare/TranslationOf.groovy  |  22 +
 .../WorkMerging/compare/WorkTitle.groovy      |  23 +
 20 files changed, 2428 insertions(+), 3 deletions(-)
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/DisjointSets.java
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/Doc.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/DocumentComparator.java
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/FieldStatus.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/Html.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/Util.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/WorkComparator.groovy
 rename whelk-core/src/main/groovy/whelk/{ => WorkMerging}/WorkMerging.java (98%)
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/WorkToolJob.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/Classification.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/Default.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/Extent.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/FieldHandler.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/GenreForm.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/SameOrEmpty.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/StuffSet.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/Subject.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/TranslationOf.groovy
 create mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/WorkTitle.groovy

diff --git a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
index 3fddb229c8..394037cf73 100644
--- a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
+++ b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
@@ -2,7 +2,7 @@ package whelk.importer
 
 import groovy.util.logging.Log4j2 as Log
 import groovy.transform.CompileStatic
-import whelk.WorkMerging
+import whelk.WorkMerging.WorkMerging
 
 import static groovy.transform.TypeCheckingMode.SKIP
 
@@ -11,7 +11,6 @@ import whelk.JsonLd
 import whelk.TargetVocabMapper
 import whelk.Whelk
 import whelk.converter.TrigToJsonLdParser
-import whelk.exception.CancelUpdateException
 import whelk.util.DocumentUtil
 import static whelk.util.LegacyIntegrationTools.NO_MARC_COLLECTION
 
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/DisjointSets.java b/whelk-core/src/main/groovy/whelk/WorkMerging/DisjointSets.java
new file mode 100644
index 0000000000..7cd3e33a6c
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/DisjointSets.java
@@ -0,0 +1,220 @@
+package whelk.WorkMerging;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * This class keeps track of a set of disjoint (non-overlapping) sets.
+ */
+public class DisjointSets<T> {
+    /**
+     * Sets as forest of rooted trees.
+     * Pointer to parent in tree, root points to itself.
+     */
+    List<Integer> forest;
+
+    /**
+     * Rank of each tree node (keeps trees balanced when merging).
+     */
+    List<Integer> ranks;
+
+    /**
+     * Sets as circular linked lists (so that we can find all elements in a set).
+     * Pointer to the next element in the set.
+     */
+    List<Integer> sets;
+
+    /**
+     * Map from set element value to index
+     */
+    Map<T, Integer> ixs;
+
+    /**
+     * Map from set element index to value
+     */
+    List<T> ixToValue;
+
+    public DisjointSets(int initialCapacity) {
+        forest = new ArrayList<>(initialCapacity);
+        ranks = new ArrayList<>(initialCapacity);
+        sets = new ArrayList<>(initialCapacity);
+        ixs = new HashMap<>(initialCapacity);
+        ixToValue = new ArrayList<>(initialCapacity);
+    }
+
+    public DisjointSets() {
+        this(20);
+    }
+
+    /**
+     * Create a new set if it doesn't already exist.
+     *
+     * @param e initial element in set
+     */
+    public void createSet(T e) {
+        if (ixs.containsKey(e)) {
+            return;
+        }
+
+        int ix = forest.size();
+        ixs.put(e, ix);
+        forest.add(ix);
+        ranks.add(0);
+        sets.add(ix);
+        ixToValue.add(e);
+
+        if (ix == Integer.MAX_VALUE) {
+            throw new IllegalStateException("size > Integer.MAX_VALUE");
+        }
+    }
+
+    /**
+     * Add a set, merging it with existing intersecting sets
+     *
+     * @param set a set to be added
+     */
+    public void addSet(Iterable<T> set) {
+        Iterator<T> i = set.iterator();
+        if (!i.hasNext()) {
+            return;
+        }
+
+        T first = i.next();
+        while (i.hasNext()) {
+            mergeSets(first, i.next());
+        }
+    }
+
+    /**
+     * Merge two sets identified by elements.
+     * Sets will be created if they don't exist
+     *
+     * @param a an element of the first set
+     * @param b an element of the second set
+     */
+    public void mergeSets(T a, T b) {
+        if (!ixs.containsKey(a)) {
+            createSet(a);
+        }
+        if (!ixs.containsKey(b)) {
+            createSet(b);
+        }
+
+        int ixA = ixs.get(a);
+        int ixB = ixs.get(b);
+
+        int rootA = root(ixA);
+        int rootB = root(ixB);
+
+        if (rootA == rootB) {
+            return;
+        }
+
+        int rankA = ranks.get(rootA);
+        int rankB = ranks.get(rootB);
+
+        if (rankA > rankB) {
+            forest.set(rootB, rootA);
+        } else {
+            forest.set(rootA, rootB);
+            if (rankA == rankB) {
+                ranks.set(rootB, rankB + 1);
+            }
+        }
+
+        int link = sets.get(rootA);
+        sets.set(rootA, sets.get(rootB));
+        sets.set(rootB, link);
+    }
+
+    /**
+     * Lookup a set based on an element in the set
+     *
+     * @param e an element in the set
+     * @return the set
+     */
+    public Set<T> getSet(T e) {
+        if (!ixs.containsKey(e)) {
+            throw new IllegalArgumentException("No set with element: " + e);
+        }
+
+        Set<T> result = new HashSet<>();
+        int start = sets.get(ixs.get(e));
+        int node = start;
+        do {
+            result.add(ixToValue.get(node));
+            node = sets.get(node);
+        } while (node != start);
+
+        return result;
+    }
+
+    /**
+     * Iterate over all sets
+     *
+     * @param visitor
+     */
+    public void iterateAllSets(SetVisitor<T> visitor) {
+        boolean[] visited = new boolean[sets.size()];
+
+        for (int ix : sets) {
+            if (visited[ix]) {
+                continue;
+            }
+
+            int start = sets.get(ix);
+            int node = start;
+            do {
+                visited[node] = true;
+                visitor.nextElement(ixToValue.get(node));
+                node = sets.get(node);
+            } while (node != start);
+
+            visitor.closeSet();
+        }
+    }
+
+    /**
+     * @return a set with all sets
+     */
+    public Set<Set<T>> allSets() {
+        final Set<Set<T>> result = new HashSet<>();
+
+        iterateAllSets(new SetVisitor<T>() {
+            Set<T> current = new HashSet<>();
+
+            public void closeSet() {
+                result.add(current);
+                current = new HashSet<>();
+            }
+
+            public void nextElement(T e) {
+                current.add(e);
+            }
+        });
+
+        return result;
+    }
+
+    private int root(int node) {
+        while (node != forest.get(node)) {
+            int parent = forest.get(node);
+            //path splitting - point node to grandparent
+            forest.set(node, forest.get(parent));
+            node = parent;
+        }
+
+        return node;
+    }
+
+    public interface SetVisitor<T> {
+        void nextElement(T e);
+
+        void closeSet();
+    }
+}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/Doc.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/Doc.groovy
new file mode 100644
index 0000000000..c50a7abf85
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/Doc.groovy
@@ -0,0 +1,363 @@
+package whelk.WorkMerging
+
+import se.kb.libris.Normalizers
+import whelk.Document
+import whelk.JsonLd
+import whelk.Whelk
+
+import static whelk.WorkMerging.Util.asList
+
+class Doc {
+    public static final String SAOGF_SKÖN = 'https://id.kb.se/term/saogf/Sk%C3%B6nlitteratur'
+    public static final List MARC_FICTION = [
+            'https://id.kb.se/marc/FictionNotFurtherSpecified',
+            'https://id.kb.se/marc/Drama',
+            'https://id.kb.se/marc/Essay',
+            'https://id.kb.se/marc/Novel',
+            'https://id.kb.se/marc/HumorSatiresEtc',
+            'https://id.kb.se/marc/Letter',
+            'https://id.kb.se/marc/ShortStory',
+            'https://id.kb.se/marc/MixedForms',
+            'https://id.kb.se/marc/Poetry',
+    ]
+    public static final List MARC_NOT_FICTION = [
+            'https://id.kb.se/marc/NotFictionNotFurtherSpecified',
+            'https://id.kb.se/marc/Biography'
+    ]
+    public static final List DRAMA_GF = [
+            'https://id.kb.se/term/saogf/Dramatik',
+            'https://id.kb.se/marc/Drama'
+    ]
+
+    Whelk whelk
+    Document doc
+    Map work
+    Map framed
+    List<String> titles
+
+    //FIXME
+    Document ogDoc
+
+    Doc(Whelk whelk, Document doc) {
+        this.whelk = whelk
+        this.doc = doc
+        this.ogDoc = doc.clone()
+    }
+
+    Map getWork() {
+        if (!work) {
+            work = getWork(whelk, doc)
+        }
+
+        return work
+    }
+
+    static Map getWork(Whelk whelk, Document d) {
+        Map work = Normalizers.getWork(whelk.jsonld, d)
+        if (!work) {
+            throw new NoWorkException(d.shortId)
+        }
+        work = new HashMap<>(work)
+
+        //TODO 'marc:fieldref'
+
+        work.remove('@id')
+        return work
+    }
+
+    Map workCopy() {
+        return getWork(whelk, doc.clone())
+    }
+
+    Map getMainEntity() {
+        return doc.data['@graph'][1]
+    }
+
+    boolean isInstance() {
+        return getMainEntity().containsKey('instanceOf')
+    }
+
+    List<String> getTitleVariants() {
+        if (!titles) {
+            titles = Util.getTitleVariants(getMainEntity()['hasTitle'])
+        }
+
+        return titles
+    }
+
+    boolean hasGenericTitle() {
+        Util.hasGenericTitle(getMainEntity()['hasTitle'])
+    }
+
+    private static String displayTitle(Map thing) {
+        thing['hasTitle'].collect { it['@type'] + ": " + it['flatTitle'] }.join(', ')
+    }
+
+    String mainEntityDisplayTitle() {
+        displayTitle(['hasTitle': Util.flatTitles(getMainEntity()['hasTitle'])])
+    }
+
+    String link() {
+        String base = Document.getBASE_URI().toString()
+        String kat = "katalogisering/"
+        String id = doc.shortId
+        return base + kat + id
+    }
+
+    boolean isMonograph() {
+        getMainEntity()['issuanceType'] == 'Monograph'
+    }
+
+    boolean hasPart() {
+        getWork()['hasPart'] != null
+    }
+
+    String encodingLevel() {
+        return doc.data['@graph'][0]['encodingLevel'] ?: ''
+    }
+
+    int numPages() {
+        String extent = Util.getPathSafe(getMainEntity(), ['extent', 0, 'label', 0]) ?: Util.getPathSafe(getMainEntity(), ['extent', 0, 'label'], '')
+        return numPages(extent)
+    }
+
+    // TODO: improve parsing https://metadatabyran.kb.se/beskrivning/materialtyper-arbetsfloden/tryckta-monografier/omfang-for-tryckta-monografier
+    static int numPages(String extentLabel) {
+        def l = extentLabel.replace('onumrerade', '')
+        def matcher = l =~ /(\d+)(?=[, \[\]0-9]*[sp])/
+        List<Integer> pages = []
+        while (matcher.find()) {
+            pages << Integer.parseInt(matcher.group(1))
+        }
+        pages ? pages.max() : -1
+    }
+
+    // TODO...
+    String getDisplayText(String field) {
+        if (field == 'contribution') {
+            return contributorStrings().join("<br>")
+        } else if (field == 'classification') {
+            return classificationStrings().join("<br>")
+        } else if (field == 'instance title') {
+            return isInstance() ? (getMainEntity()['hasTitle'] ?: '') : ''
+        } else if (field == 'work title') {
+            // To load hasTitle from linked work in instanceOf we can use getFramed()
+            // However we then need to handle that getFramed() loads linked instances in hasTitle.source
+            // Prefer getMainEntity() for now
+            return isInstance() ? (getMainEntity()['instanceOf']['hasTitle'] ?: '') : (getMainEntity()['hasTitle'] ?: '')
+        } else if (field == 'instance type') {
+            return isInstance() ? getMainEntity()['@type'] : ''
+        } else if (field == 'editionStatement') {
+            return getMainEntity()['editionStatement'] ?: ''
+        } else if (field == 'responsibilityStatement') {
+            return getMainEntity()['responsibilityStatement'] ?: ''
+        } else if (field == 'encodingLevel') {
+            return encodingLevel()
+        } else if (field == 'publication') {
+            return chipString(getMainEntity()['publication'] ?: [])
+        } else if (field == 'identifiedBy') {
+            return chipString(getMainEntity()['identifiedBy'] ?: [])
+        } else if (field == 'extent') {
+            return chipString(getMainEntity()['extent'] ?: [])
+        } else if (field == 'reproductionOf') {
+            return reproductionOfLink()
+        } else {
+            return chipString(getWork().getOrDefault(field, []))
+        }
+    }
+
+    protected String chipString(def thing) {
+        Util.chipString(thing, whelk)
+    }
+
+    String tooltip(String string, String tooltip) {
+        """<abbr title="${tooltip}">${string}</abbr>"""
+    }
+
+    private String reproductionOfLink() {
+        def shortId = Util.getPathSafe(getMainEntity(), ['reproductionOf', '@id'])
+                ?.tokenize("/#")
+                ?.dropRight(1)
+                ?.last() ?: ''
+
+        return "<a href=\"#$shortId\">$shortId</a>"
+    }
+
+    private List classificationStrings() {
+        List path = isInstance() ? ['instanceOf', 'classification'] : ['classification']
+        List<Map> classification = Util.getPathSafe(getFramed(), path, [])
+        classification.collect() { c ->
+            StringBuilder s = new StringBuilder()
+            s.append(flatMaybeLinked(c['inScheme'], ['code', 'version']).with { it.isEmpty() ? it : it + ': ' })
+            s.append(flatMaybeLinked(c, ['code']))
+            return s.toString()
+        }
+    }
+
+    private List contributorStrings() {
+        List path = isInstance() ? ['instanceOf', 'contribution'] : ['contribution']
+        List contribution = Util.getPathSafe(getFramed(), path, [])
+
+        return contribution.collect { Map c ->
+            contributionStr(c)
+        }
+    }
+
+    protected Map getFramed() {
+        if (!framed) {
+            if (isInstance()) {
+                framed = JsonLd.frame(doc.getThingIdentifiers().first(), whelk.loadEmbellished(doc.shortId).data)
+            } else {
+                Document copy = doc.clone()
+                whelk.embellish(copy)
+                framed = JsonLd.frame(doc.getThingIdentifiers().first(), copy.data)
+            }
+        }
+
+        return framed
+    }
+
+    private String contributionStr(Map contribution) {
+        StringBuilder s = new StringBuilder()
+
+        if (contribution['@type'] == 'PrimaryContribution') {
+            s.append('<b>')
+        }
+
+        s.append(flatMaybeLinked(contribution['role'], ['code', 'label']).with { it.isEmpty() ? it : it + ': ' })
+        s.append(flatMaybeLinked(contribution['agent'], ['givenName', 'familyName', 'lifeSpan', 'name']))
+
+        if (contribution['@type'] == 'PrimaryContribution') {
+            s.append('</b>')
+        }
+
+        return s.toString()
+    }
+
+    static String flatten(Object o, List order, String mapSeparator = ': ') {
+        if (o instanceof String) {
+            return o
+        }
+        if (o instanceof List) {
+            return o
+                    .collect { flatten(it, order) }
+                    .join(' || ')
+        }
+        if (o instanceof Map) {
+            return order
+                    .findResults { ((Map) o).get(it) }
+                    .collect { flatten(it, order) }
+                    .join(mapSeparator)
+        }
+
+        throw new RuntimeException(String.format("unexpected type: %s for %s", o.class.getName(), o))
+    }
+
+    private String flatMaybeLinked(Object thing, List order) {
+        if (!thing)
+            return ''
+
+        if (thing instanceof List) {
+            return thing.collect { flatMaybeLinked(it, order) }.join(' | ')
+        }
+        String s = flatten(thing, order, ', ')
+
+        thing['@id']
+                ? """<a href="${thing['@id']}">$s</a>"""
+                : s
+    }
+
+    boolean isFiction() {
+        isMarcFiction() || isSaogfFiction() || isSabFiction()
+    }
+
+    boolean isMarcFiction() {
+        (getWork()['genreForm'] ?: []).any { it['@id'] in MARC_FICTION }
+    }
+
+    boolean isMarcNotFiction() {
+        (getWork()['genreForm'] ?: []).any { it['@id'] in MARC_NOT_FICTION }
+    }
+
+    boolean isSaogfFiction() {
+        (getWork()['genreForm'] ?: []).any { whelk.relations.isImpliedBy(SAOGF_SKÖN, it['@id'] ?: '') }
+    }
+
+    boolean isSabFiction() {
+        classificationStrings().any { it.contains('kssb') && it.contains(': H') }
+    }
+
+    boolean isNotFiction() {
+        // A lot of fiction has marc/NotFictionNotFurtherSpecified but then classification is usually empty
+        isMarcNotFiction() && (!classificationStrings().isEmpty() && !isSabFiction())
+    }
+
+    boolean isText() {
+        getWork()['@type'] == 'Text'
+    }
+
+    boolean isTranslationWithoutTranslator() {
+        isTranslation() && !hasTranslator()
+    }
+
+    boolean isTranslation() {
+        getWork()['translationOf']
+    }
+
+    boolean isSabDrama() {
+        classificationStrings().any { it.contains(': Hc.02') || it.contains(': Hce.02') }
+    }
+
+    boolean isGfDrama() {
+        asList(getWork()['genreForm']).any { it['@id'] in DRAMA_GF }
+    }
+
+    boolean isDrama() {
+        isSabDrama() || isGfDrama()
+    }
+
+    boolean hasRole(String relatorIri) {
+        asList(getWork()['contribution']).any {
+            asList(it['role']).contains(['@id': relatorIri])
+        }
+    }
+
+    boolean hasTranslator() {
+        hasRole('https://id.kb.se/relator/translator')
+    }
+
+    boolean hasDistinguishingEdition() {
+        (getMainEntity()['editionStatement'] ?: '').toString().toLowerCase().contains("förk")
+    }
+
+    boolean hasRelationshipWithContribution() {
+        asList(getWork()['relationship']).any { r ->
+            asList(r['entity']).any { e ->
+                e.containsKey('contribution')
+            }
+        }
+    }
+
+    void addComparisonProps() {
+        if (hasDistinguishingEdition()) {
+            addToWork('editionStatement')
+        }
+        getWork()['_numPages'] = numPages()
+    }
+
+    void moveSummaryToInstance() {
+        if (getWork()['summary']) {
+            getMainEntity()['summary'] = asList(getMainEntity()['summary']) + asList(getWork()['summary'])
+            getWork().remove('summary')
+        }
+    }
+
+    void addToWork(String field) {
+        getWork()[field] = getMainEntity()[field]
+    }
+
+    void removeComparisonProps() {
+        getWork().remove('editionStatement')
+        getWork().remove('_numPages')
+    }
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/DocumentComparator.java b/whelk-core/src/main/groovy/whelk/WorkMerging/DocumentComparator.java
new file mode 100644
index 0000000000..2adb902b0b
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/DocumentComparator.java
@@ -0,0 +1,231 @@
+package whelk.WorkMerging;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+import java.util.function.Function;
+
+public class DocumentComparator {
+    private static final Comparator<Object> BY_HASH = (o1, o2) -> o2.hashCode() - o1.hashCode();
+
+    private final Function<Object, Boolean> isOrderedList;
+
+    public DocumentComparator() {
+        this(o -> "termComponentList".equals(o));
+    }
+
+    public DocumentComparator(Function<Object, Boolean> isOrderedList) {
+        if (isOrderedList == null)
+            throw new NullPointerException();
+        this.isOrderedList = isOrderedList;
+    }
+
+    public boolean isEqual(Map<?, ?> a, Map<?, ?> b) {
+        if (a == null || b == null || a.size() != b.size()) {
+            return false;
+        }
+        for (Object key : a.keySet()) {
+            if (!isEqual(a.get(key), b.get(key), key)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private boolean isEqual(Object a, Object b, Object key) {
+        if (a == null || b == null) {
+            return false;
+        }
+        else if (a.getClass() != b.getClass()) {
+            return (isSingleItemList(a) && isEqual(((List<?>) a).get(0), b, key)
+                    || (isSingleItemList(b) && isEqual(a, ((List<?>) b).get(0), key)));
+        }
+        else if (a instanceof Map) {
+            return isEqual((Map<?, ?>) a, (Map<?, ?>) b);
+        }
+        else if (a instanceof List) {
+            if (isOrderedList.apply(key)) {
+                return isEqualOrdered((List<?>) a, (List<?>) b);
+            } else {
+                return isEqualUnordered((List<?>) a, (List<?>) b);
+            }
+        }
+        else {
+            return a.equals(b);
+        }
+    }
+
+    private boolean isSingleItemList(Object o) {
+        return o instanceof List && ((List<?>) o).size() == 1;
+    }
+
+    private boolean isEqualOrdered(List<?> a, List<?> b) {
+        if (a.size() != b.size()) {
+            return false;
+        }
+        for (int i = 0; i < a.size(); i++) {
+            if (!isEqual(a.get(i), b.get(i), null)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private boolean isEqualUnordered(List<?> a, List<?> b) {
+        if (a.size() != b.size()) {
+            return false;
+        }
+
+        a.sort(BY_HASH);
+        b.sort(BY_HASH);
+        
+        List<Integer> taken = new ArrayList<>(a.size());
+        nextA: for (int i = 0 ; i < a.size() ; i++) {
+            for (int j = 0 ; j < b.size() ; j++) {
+                if (!taken.contains(j) && isEqual(a.get(i), b.get(j), null)) {
+                    taken.add(j);
+                    continue nextA;
+                }
+            }
+            return false;
+        }
+
+        return true;
+    }
+
+    public boolean isSubset(Map<?, ?> a, Map<?, ?> b) {
+        if (a == null || b == null || a.size() > b.size()) {
+            return false;
+        }
+        for (Object key : a.keySet()) {
+            if (!isSubset(a.get(key), b.get(key), key)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private boolean isSubset(Object a, Object b, Object key) {
+        if (a == null || b == null || a.getClass() != b.getClass()) {
+            return false;
+        }
+        else if (a instanceof Map) {
+            return isSubset((Map<?, ?>) a, (Map<?, ?>) b);
+        }
+        else if (a instanceof List) {
+            if (isOrderedList.apply(key)) {
+                return isOrderedSubset((List<?>) a, (List<?>) b);
+            } else {
+                return isUnorderedSubset((List<?>) a, (List<?>) b);
+            }
+        }
+        else {
+            return a.equals(b);
+        }
+    }
+
+    private boolean isOrderedSubset(List<?> a, List<?> b) {
+        if (a.size() > b.size()) {
+            return false;
+        }
+        int ixB = 0;
+        for (int ixA = 0; ixA < a.size(); ixA++) {
+            if (ixB == b.size()) {
+                return false;
+            }
+
+            while (!isSubset(a.get(ixA), b.get(ixB++), null)) {
+                if (ixB == b.size()) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    private boolean isUnorderedSubset(List<?> a, List<?> b) {
+        return new UnorderedListComparator(a, b).isSubset();
+    }
+
+    private class UnorderedListComparator {
+        List a;
+        List b;
+
+        Stack<Integer> stack;
+        Stack<Integer> matched;
+        boolean anyMatch;
+        Boolean[][] cache;
+
+        UnorderedListComparator(List<?> a, List<?> b) {
+            this.a = a;
+            this.b = b;
+            cache = new Boolean[a.size()][b.size()];
+        }
+
+        boolean isSubset() {
+            // since elements in 'a' might be subsets of more than one element
+            // in 'b' we must try different ways of matching elements
+            stack = new Stack<>();
+            matched = new Stack<>();
+
+            nextA();
+            while (stack.size() > 0) {
+                boolean match = isSubset(ixA(), ixB());
+                nextB();
+                if (match) {
+                    anyMatch = true;
+                    if (!matched.contains(ixB())) {
+                        matched.push(ixB());
+                        if (matched.size() == a.size()) {
+                            return true;
+                        }
+                        nextA();
+                    }
+                }
+
+                while (ixB() == b.size()) {
+                    if (!anyMatch) {
+                        return false;
+                    }
+                    previousA();
+                }
+            }
+
+            return false;
+        }
+
+        private boolean isSubset(int ixA, int ixB) {
+            if (cache[ixA][ixB] == null) {
+                cache[ixA][ixB] = DocumentComparator.this.isSubset(a.get(ixA), b.get(ixB), null);
+            }
+
+            return cache[ixA][ixB];
+        }
+
+        private void previousA() {
+            stack.pop();
+            if (matched.size() > 0) {
+                matched.pop();
+            }
+        }
+
+        private void nextA() {
+            stack.push(0);
+            anyMatch = false;
+        }
+
+        private void nextB() {
+            stack.push(stack.pop() + 1);
+        }
+
+        private int ixA() {
+            return stack.size() - 1;
+        }
+
+        private int ixB() {
+            return stack.size() > 0 ? stack.peek() : -1;
+        }
+    }
+}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/FieldStatus.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/FieldStatus.groovy
new file mode 100644
index 0000000000..a33445d1b4
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/FieldStatus.groovy
@@ -0,0 +1,7 @@
+package whelk.WorkMerging
+
+enum FieldStatus {
+    EQUAL,
+    COMPATIBLE,
+    DIFF
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/Html.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/Html.groovy
new file mode 100644
index 0000000000..c313415618
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/Html.groovy
@@ -0,0 +1,111 @@
+package whelk.WorkMerging
+
+import org.apache.commons.codec.digest.DigestUtils
+
+import static whelk.WorkMerging.FieldStatus.COMPATIBLE
+import static whelk.WorkMerging.FieldStatus.DIFF
+import static whelk.WorkMerging.FieldStatus.EQUAL
+
+class Html {
+    private static String CSS = Html.class.getClassLoader()
+            .getResourceAsStream('merge-works/table.css').getText("UTF-8")
+
+    static final String START = """<html><head>
+                    <meta charset="UTF-8">
+                    <style>$CSS</style>
+                    </head><body>"""
+    static final String END = '</body></html>'
+    static final String HORIZONTAL_RULE = "<hr/><br/>\n"
+
+    static def infoFields = ['reproductionOf', 'instance title', 'work title', 'instance type', 'editionStatement', 'responsibilityStatement', 'encodingLevel', 'publication', 'identifiedBy', 'extent']
+
+    static String clusterTable(Collection<Doc> cluster) {
+        String id = clusterId(cluster.collect { it.doc.shortId })
+        String header = """
+            <tr>
+                <th><a id="${id}"><a href="#${id}">${id}</th>
+                ${cluster.collect { doc -> "<th><a id=\"${doc.doc.shortId}\" href=\"${doc.link()}\">${doc.doc.shortId}</a></th>" }.join('\n')}
+            </tr>
+            <tr>
+                <td></td>
+                ${cluster.collect { doc -> "<td>${doc.mainEntityDisplayTitle()}</td>" }.join('\n')}                                     
+            </tr>
+           """.stripIndent()
+
+        def statuses = WorkComparator.compare(cluster)
+
+        String info = infoFields.collect(fieldRows(cluster, "info")).join('\n')
+        String equal = statuses.get(EQUAL, []).collect(fieldRows(cluster, cluster.size() > 1 ? EQUAL.toString() : "")).join('\n')
+        String compatible = statuses.get(COMPATIBLE, []).collect(fieldRows(cluster, COMPATIBLE.toString())).join('\n')
+        String diff = statuses.get(DIFF, []).collect(fieldRows(cluster, DIFF.toString())).join('\n')
+
+        return """
+            <table>
+                ${header}
+                ${equal}
+                ${compatible}
+                ${diff}
+                ${info}
+            </table>
+            <br/><br/>
+        """
+    }
+
+    static String hubTable(List<Collection<Doc>> docs) {
+        def mergedWorks = docs*.first()
+        def ids = docs.collect { group ->
+            group.drop(1).collectEntries { doc ->
+                [doc.doc.shortId, doc.link()]
+            }
+        }
+        def clusterId = clusterId(ids*.keySet().flatten())
+
+        String header = """
+            <tr>
+                <th><a id="${clusterId}"><a href="#${clusterId}">${clusterId}</th>
+                ${mergedWorks.collect { "<th></th>" }.join('\n')}
+            </tr>
+           """.stripIndent()
+
+        String derivedFrom =
+                """
+                    <tr class="info">
+                        <td>_derivedFrom</td>
+                        ${ids.collect { "<td>${it.collect { id, link -> "<a id=\"$id\" href=\"$link\">$id</a>" }.join('\n')}</td>" }.join('\n')}
+                        </tr> 
+                """.stripIndent()
+
+        def statuses = WorkComparator.compare(mergedWorks)
+
+        String equal = statuses.get(EQUAL, []).collect(fieldRows(mergedWorks, mergedWorks.size() > 1 ? EQUAL.toString() : "")).join('\n')
+        String compatible = statuses.get(COMPATIBLE, []).collect(fieldRows(mergedWorks, COMPATIBLE.toString())).join('\n')
+        String diff = statuses.get(DIFF, []).collect(fieldRows(mergedWorks, DIFF.toString())).join('\n')
+
+        return """
+            <table>
+                ${header}
+                ${equal}
+                ${compatible}
+                ${diff}
+                ${derivedFrom}
+            </table>
+            <br/><br/>
+        """
+    }
+
+    static String clusterId(Collection<String> cluster) {
+        cluster
+                ? DigestUtils.md5Hex(cluster.sort().first()).toUpperCase().substring(0, 12)
+                : ""
+    }
+
+    private static def fieldRows(Collection<Doc> cluster, String cls) {
+        { field ->
+            """
+            <tr class="${cls}">
+                <td>${field}</td>
+                ${cluster.collect { "<td>${it.getDisplayText(field)}</td>" }.join('\n')}   
+            </tr> """.stripIndent()
+        }
+    }
+}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/Util.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/Util.groovy
new file mode 100644
index 0000000000..07a876cff3
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/Util.groovy
@@ -0,0 +1,306 @@
+package whelk.WorkMerging
+
+import org.apache.commons.lang3.StringUtils
+import whelk.Whelk
+import whelk.util.Unicode
+
+import java.util.regex.Pattern
+
+class Util {
+    static def titleComponents = ['mainTitle', 'titleRemainder', 'subtitle', 'hasPart', 'partNumber', 'partName', 'marc:parallelTitle', 'marc:equalTitle']
+
+    static def titleVariant = ['Title', 'ParallelTitle']
+    // removed 'VariantTitle', 'CoverTitle' since they sometimes contain random generic stuff like "Alibis filmroman", "Kompisböcker för de yngsta"
+
+    static enum Relator {
+        TRANSLATOR('https://id.kb.se/relator/translator'),
+        AUTHOR('https://id.kb.se/relator/author'),
+        ILLUSTRATOR('https://id.kb.se/relator/illustrator'),
+        AUTHOR_OF_INTRO('https://id.kb.se/relator/authorOfIntroduction'),
+        ADAPTER('https://id.kb.se/relator/adapter'),
+        COVER_DESIGNER('https://id.kb.se/relator/coverDesigner'),
+        COMPILER('https://id.kb.se/relator/compiler'),
+        AUTHOR_OF_AFTERWORD('https://id.kb.se/relator/authorOfAfterwordColophonEtc'),
+        PHOTOGRAPHER('https://id.kb.se/relator/photographer'),
+        EDITOR('https://id.kb.se/relator/editor'),
+        UNSPECIFIED_CONTRIBUTOR('https://id.kb.se/relator/unspecifiedContributor'),
+        PRIMARY_RIGHTS_HOLDER('https://id.kb.se/relator/primaryRightsHolder')
+
+        String iri
+
+        private Relator(String iri) {
+            this.iri = iri
+        }
+    }
+
+//    private static Set<String> IGNORED_SUBTITLES = WorkToolJob.class.getClassLoader()
+//            .getResourceAsStream('merge-works/ignored-subtitles.txt')
+//            .readLines().grep().collect(Util.&normalize) as Set
+
+    private static Set<String> GENERIC_TITLES = WorkToolJob.class.getClassLoader()
+            .getResourceAsStream('merge-works/generic-titles.txt')
+            .readLines().grep().collect(Util.&normalize) as Set
+
+    static def noise =
+            [",", '"', "'", '[', ']', ',', '.', '.', ':', ';', '-', '(', ')', ' the ', '-', '–', '+', '!', '?'].collectEntries { [it, ' '] }
+
+
+    static List asList(Object o) {
+        (o ?: []).with { it instanceof List ? it : [it] }
+    }
+
+    /**
+     * Partition a collection based on equality condition
+     *
+     * NOTE: O(n^2)...
+     */
+    static <T> Collection<Collection<T>> partition(Collection<T> collection, Closure matcher) {
+        List<List<T>> result = []
+
+        for (T t : collection) {
+            boolean match = false
+            for (List<T> group : result) {
+                if (groupMatches(t, group, matcher)) {
+                    group.add(t)
+                    match = true
+                    break
+                }
+            }
+
+            if (!match) {
+                result.add([t])
+            }
+        }
+        return result
+    }
+
+    static <T> boolean groupMatches(T t, List<T> group, Closure matcher) {
+        for (T other : group) {
+            if (matcher(other, t)) {
+                return true
+            }
+        }
+        return false
+    }
+
+    static boolean hasGenericTitle(List hasTitle) {
+        hasTitle.any { it['mainTitle'] && normalize((String) it['mainTitle']) in GENERIC_TITLES }
+    }
+
+    static List dropSubTitles(List hasTitle) {
+        hasTitle.collect { t ->
+            def copy = new TreeMap(t)
+            copy.subMap(copy.keySet() - ['subtitle', 'titleRemainder'])
+        }
+    }
+
+//    static List dropGenericSubTitles(List hasTitle) {
+//        hasTitle.collect {
+//            def copy = new TreeMap(it)
+//            if (copy['subtitle'] || copy['titleRemainder']) {
+//                DocumentUtil.traverse(copy) { value, path ->
+//                    if (('subtitle' in path || 'titleRemainder' in path) && value instanceof String && genericSubtitle(value)) {
+//                        new DocumentUtil.Remove()
+//                    }
+//                }
+//            }
+//            copy
+//        }
+//    }
+
+    static List flatTitles(List hasTitle) {
+        dropSubTitles(hasTitle).collect {
+            def title = new TreeMap<>()
+            title['flatTitle'] = normalize(Doc.flatten(it, titleComponents))
+            if (it['@type']) {
+                title['@type'] = it['@type']
+            }
+
+            title
+        }
+    }
+
+//    private static boolean genericSubtitle(String s) {
+//        s = Util.normalize(s)
+//        if (s.startsWith("en ")) {
+//            s = s.substring("en ".length())
+//        }
+//        return s in IGNORED_SUBTITLES
+//    }
+
+    static String normalize(String s) {
+        return Unicode.asciiFold(Unicode.normalizeForSearch(StringUtils.normalizeSpace(" $s ".toLowerCase().replace(noise))))
+    }
+
+    static Object getPathSafe(item, path, defaultTo = null) {
+        for (p in path) {
+            if ((item instanceof Collection || item instanceof Map) && item[p] != null) {
+                item = item[p]
+            } else {
+                return defaultTo
+            }
+        }
+        return item
+    }
+
+
+    static List<String> getTitleVariants(List hasTitle) {
+        flatTitles(hasTitle)
+                .grep { it['@type'] in titleVariant }
+                .collect { it['flatTitle'] }
+    }
+
+    static String chipString(def thing, Whelk whelk) {
+        if (thing instanceof Integer) {
+            return thing
+        }
+
+        def chips = whelk.jsonld.toChip(thing)
+        if (chips.size() < 2) {
+            chips = thing
+        }
+        if (chips instanceof List) {
+            return chips.collect { valuesString(it) }.sort().join('<br>')
+        }
+        return valuesString(chips)
+    }
+
+    private static String valuesString(def thing) {
+        if (thing instanceof List) {
+            return thing.collect { valuesString(it) }.join(' • ')
+        }
+        if (thing instanceof Map) {
+            return thing.findAll { k, v -> k != '@type' }.values().collect { valuesString(it) }.join(' • ')
+        }
+        return thing.toString()
+    }
+
+    // (docs on some of these levels are normally filtered out before we reach here)
+    static List bestEncodingLevel = [
+            'marc:FullLevel',
+            'marc:FullLevelMaterialNotExamined',
+            'marc:MinimalLevel',
+            'marc:LessThanFullLevelMaterialNotExamined',
+            'marc:CoreLevel',
+            'marc:AbbreviatedLevel',
+            'marc:PartialPreliminaryLevel',
+            'marc:PrepublicationLevel',
+            null
+    ]
+
+    // Return the most common title for the best encodingLevel
+    static Object bestTitle(Collection<Doc> docs) {
+        def isTitle = { it.'@type' == 'Title' }
+        def addSource = { t, d -> t.plus(['source': [d.getMainEntity().subMap('@id')]]) }
+
+        for (def level : bestEncodingLevel) {
+            def titles = docs
+                    .findAll { it.encodingLevel() == level }
+                    .collect { d ->
+                        d.getWork().get('hasTitle')?.findAll(isTitle)
+                                ?: d.getMainEntity().get('hasTitle')?.findResults { isTitle(it) ? addSource(it, d) : null }
+                    }
+                    .grep()
+
+            if (!titles) {
+                continue
+            }
+
+            titles = titles.collect(Util.&dropSubTitles)
+            return partition(titles, { a, b -> a == b }).sort { it.size() }.reverse().first().first()
+        }
+
+        return null
+    }
+
+    static Map<String, List<Tuple2<Relator, Boolean>>> parseRespStatement(String respStatement) {
+        def parsedContributions = [:]
+
+        respStatement.split(';').eachWithIndex { part, i ->
+            // TODO: generalize for other material types
+            parseSwedishFictionContribution(StringUtils.normalizeSpace(part), i == 0).each { name, roles ->
+                parsedContributions
+                        .computeIfAbsent(name, r -> [])
+                        .addAll(roles)
+            }
+        }
+
+        return parsedContributions
+    }
+
+    private static Map<String, List<Tuple2<Relator, Boolean>>> parseSwedishFictionContribution(String contribution, boolean isFirstPart) {
+        def roleToPattern =
+                [
+                        (Relator.TRANSLATOR)         : ~/(bemynd(\w+|\.)? )?öf?v(\.|ers(\.|\p{L}+)?)( (till|från) \p{L}+)?|(till svenskan?|från \p{L}+)|svensk text/,
+                        (Relator.AUTHOR)             : ~/^(text(e[nr])?|skriven|written)/,
+                        (Relator.ILLUSTRATOR)        : ~/\bbild(er)?|ill(\.|ustr(\.|\w+)?)|\bvi(gn|nj)ett(er|ill)?|ritad/,
+                        (Relator.AUTHOR_OF_INTRO)    : ~/förord|inl(edn(\.|ing)|edd)/,
+                        (Relator.COVER_DESIGNER)     : ~/omslag/,
+                        (Relator.AUTHOR_OF_AFTERWORD): ~/efter(ord|skrift)/,
+                        (Relator.PHOTOGRAPHER)       : ~/\bfoto\w*\.?/,
+                        (Relator.EDITOR)             : ~/red(\.(?! av)|aktör(er)?)|\bbearb(\.|\w+)?|återberättad|sammanställ\w*/,
+                ]
+
+        def rolePattern = ~/((?iu)${roleToPattern.values().join('|')})/
+        def followsRolePattern = ~/(:| a[fv]| by) /
+        def initialPattern = ~/\p{Lu}/
+        def namePattern = ~/\p{Lu}:?\p{Ll}+('\p{Ll})?(,? [Jj](r|unior))?/
+        def betweenNamesPattern = ~/-| |\. ?| (de(l| la)?|von|van( de[nr])?|v\.|le|af|du|dos) | [ODdLl]'/
+        def fullNamePattern = ~/(($initialPattern|$namePattern)($betweenNamesPattern)?)*$namePattern/
+        def conjPattern = ~/ (och|&|and) /
+        def roleAfterNamePattern = ~/( ?\(($rolePattern$conjPattern)?$rolePattern\))/
+        def fullContributionPattern = ~/(($rolePattern($conjPattern|\/))*$rolePattern$followsRolePattern)?$fullNamePattern($conjPattern$fullNamePattern)*$roleAfterNamePattern?/
+
+        // Make roles lower case so that they can't be mistaken for names
+        contribution = (contribution =~ rolePattern)*.first()
+                .collectEntries { [it, it.toLowerCase()] }
+                .with { contribution.replace(it) }
+
+        def nameToRoles = [:]
+
+        def matched = (contribution =~ fullContributionPattern)*.first()
+
+        matched.each { m ->
+            // Extract roles from the contribution
+            def roles = roleToPattern
+                    .findAll { k, v -> m =~ /(?iu)$v/ }
+                    .with {
+                        it.isEmpty() && contribution =~ /.+$followsRolePattern/
+                                ? [new Tuple2(Relator.UNSPECIFIED_CONTRIBUTOR, isFirstPart)]
+                                : it.collect { role, pattern -> new Tuple2(role, isFirstPart) }
+                    }
+
+            // Author should be the role if first part of respStatement (before ';') and no role seems to be stated
+            if (roles.isEmpty() && isFirstPart) {
+                roles << new Tuple2(Relator.AUTHOR, isFirstPart)
+            }
+
+            // Extract names from the contribution
+            def names = parseNames(fullNamePattern, conjPattern, m)
+
+            // Assign the roles to each name
+            nameToRoles.putAll(names.collectEntries { [it, roles] })
+        }
+
+        return nameToRoles
+    }
+
+    private static List<String> parseNames(Pattern namePattern, Pattern conjPattern, String s) {
+        def names = []
+
+        (s =~ namePattern).each {
+            def name = it.first()
+            // Handle the case of "Jan och Maria Larsson"
+            def previousName = names.isEmpty() ? null : names.last()
+            if (previousName?.split()?.size() == 1 && s =~ /$previousName$conjPattern$name/) {
+                def nameParts = name.split()
+                if (nameParts.size() > 1) {
+                    names[-1] += " ${nameParts.last()}"
+                }
+            }
+            names << name
+        }
+
+        return names
+    }
+}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/WorkComparator.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/WorkComparator.groovy
new file mode 100644
index 0000000000..faa369e9d9
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/WorkComparator.groovy
@@ -0,0 +1,136 @@
+package whelk.WorkMerging
+
+import whelk.WorkMerging.compare.Classification
+import whelk.WorkMerging.compare.SameOrEmpty
+import whelk.WorkMerging.compare.Default
+import whelk.WorkMerging.compare.Extent
+import whelk.WorkMerging.compare.FieldHandler
+import whelk.WorkMerging.compare.GenreForm
+import whelk.WorkMerging.compare.StuffSet
+import whelk.WorkMerging.compare.Subject
+import whelk.WorkMerging.compare.TranslationOf
+import whelk.WorkMerging.compare.ValuePicker
+import whelk.WorkMerging.compare.WorkTitle
+
+import static whelk.WorkMerging.Util.bestTitle
+
+class WorkComparator {
+    Set<String> fields
+    DocumentComparator c = new DocumentComparator()
+
+    Map<String, FieldHandler> comparators = [
+            'classification'  : new Classification(),
+            'contentType'     : new SameOrEmpty('https://id.kb.se/term/rda/Text'),
+            'genreForm'       : new GenreForm(),
+            'hasTitle'        : new WorkTitle(),
+            'intendedAudience': new SameOrEmpty('https://id.kb.se/marc/Juvenile'),
+            '_numPages'       : new Extent(),
+            'subject'         : new Subject(),
+            'summary'         : new StuffSet(),
+            'translationOf'   : new TranslationOf(),
+    ]
+
+    static FieldHandler DEFAULT = new Default()
+
+    WorkComparator(Set<String> fields) {
+        this.fields = new HashSet<>(fields)
+    }
+
+    boolean sameWork(Doc a, Doc b) {
+        fields.every { compare(a, b, it).with { it == EQUAL || it == COMPATIBLE } }
+    }
+
+    FieldStatus compare(Doc a, Doc b, String field) {
+        Object oa = a.getWork().get(field)
+        Object ob = b.getWork().get(field)
+
+        if (oa == null && ob == null) {
+            return FieldStatus.EQUAL
+        }
+
+        compareExact(oa, ob, field) == FieldStatus.EQUAL
+                ? FieldStatus.EQUAL
+                : compareDiff(a, b, field)
+    }
+
+    Map merge(Collection<Doc> docs) {
+        Map result = [:]
+
+        if (docs.size() > 1) {
+            fields.each { field ->
+                FieldHandler h = comparators.getOrDefault(field, DEFAULT)
+                def value = h instanceof ValuePicker
+                        ? h.pick(docs)
+                        : mergeField(field, h, docs)
+
+                if (value) {
+                    result[field] = value
+                }
+            }
+        } else {
+            result = docs[0].workCopy()
+        }
+
+        if (!result['hasTitle']) {
+            def bestTitle = bestTitle(docs)
+            if (bestTitle) {
+                result['hasTitle'] = bestTitle
+            }
+        }
+
+        return result
+    }
+
+    // TODO: preserve order? e.g. subject
+    private Object mergeField(String field, FieldHandler h, Collection<Doc> docs) {
+        Object value = docs.first().getWork().get(field)
+        def rest = docs.drop(1)
+        rest.each {
+            value = h.merge(value, it.getWork().get(field))
+        }
+        return value
+    }
+
+    private FieldStatus compareDiff(Doc a, Doc b, String field) {
+        comparators.getOrDefault(field, DEFAULT).isCompatible(a.getWork().get(field), b.getWork().get(field))
+                ? FieldStatus.COMPATIBLE
+                : FieldStatus.DIFF
+    }
+
+    private FieldStatus compareExact(Object oa, Object ob, String field) {
+        c.isEqual([(field): oa], [(field): ob]) ? FieldStatus.EQUAL : FieldStatus.DIFF
+    }
+
+    static Map<FieldStatus, List<String>> compare(Collection<Doc> cluster) {
+        WorkComparator c = new WorkComparator(allFields(cluster))
+
+        Map<FieldStatus, List<String>> result = [:]
+        c.fieldStatuses(cluster).each { f, s -> result.get(s, []) << f }
+        return result
+    }
+
+    static Set<String> allFields(Collection<Doc> cluster) {
+        Set<String> fields = new HashSet<>()
+        cluster.each { fields.addAll(it.getWork().keySet()) }
+        return fields - 'summary' // - 'summary' only temporary, remove when summaries have been moved to instance (LXL-3303)
+    }
+
+    Map<String, FieldStatus> fieldStatuses(Collection<Doc> cluster) {
+        fields.collectEntries { [it, fieldStatus(cluster, it)] }
+    }
+
+    FieldStatus fieldStatus(Collection<Doc> cluster, String field) {
+        boolean anyCompat = false
+        [cluster, cluster].combinations().findResult { List combination ->
+            Doc a = combination.first()
+            Doc b = combination.last()
+
+            def c = compare(a, b, field)
+            if (c == FieldStatus.COMPATIBLE) {
+                anyCompat = true
+            }
+            c == FieldStatus.DIFF ? c : null
+        } ?: (anyCompat ? FieldStatus.COMPATIBLE : FieldStatus.EQUAL)
+    }
+
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging.java b/whelk-core/src/main/groovy/whelk/WorkMerging/WorkMerging.java
similarity index 98%
rename from whelk-core/src/main/groovy/whelk/WorkMerging.java
rename to whelk-core/src/main/groovy/whelk/WorkMerging/WorkMerging.java
index 6d9fb2e6d0..99a2105145 100644
--- a/whelk-core/src/main/groovy/whelk/WorkMerging.java
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/WorkMerging.java
@@ -1,4 +1,8 @@
-package whelk;
+package whelk.WorkMerging;
+
+import whelk.Document;
+import whelk.IdGenerator;
+import whelk.Whelk;
 
 import java.util.*;
 
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/WorkToolJob.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/WorkToolJob.groovy
new file mode 100644
index 0000000000..d30deeb9ca
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/WorkToolJob.groovy
@@ -0,0 +1,797 @@
+package whelk.WorkMerging
+
+
+import whelk.Document
+import whelk.IdGenerator
+import whelk.JsonLd
+import whelk.Whelk
+import whelk.exception.WhelkRuntimeException
+import whelk.util.LegacyIntegrationTools
+import whelk.util.Statistics
+
+import java.text.SimpleDateFormat
+import java.util.concurrent.ExecutorService
+import java.util.concurrent.Executors
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicInteger
+import java.util.function.Function
+
+import static whelk.WorkMerging.FieldStatus.DIFF
+
+import static whelk.WorkMerging.Util.asList
+import static whelk.WorkMerging.Util.chipString
+import static whelk.WorkMerging.Util.getPathSafe
+import static whelk.WorkMerging.Util.normalize
+import static whelk.WorkMerging.Util.partition
+import static whelk.WorkMerging.Util.parseRespStatement
+import static whelk.WorkMerging.Util.Relator
+
+class WorkToolJob {
+    Whelk whelk
+    Statistics statistics
+    File clusters
+
+    String date = new SimpleDateFormat('yyyyMMdd-HHmmss').format(new Date())
+    String jobId = IdGenerator.generate()
+    File reportDir = new File("reports/$date/merged-works")
+
+    String changedIn = "xl"
+    String changedBy = "SEK"
+    String generationProcess = 'https://libris.kb.se/sys/merge-works'
+    boolean dryRun = true
+    boolean skipIndex = false
+    boolean loud = false
+    boolean verbose = false
+
+    WorkToolJob(File clusters) {
+        this.clusters = clusters
+
+        this.whelk = Whelk.createLoadedSearchWhelk('secret', true)
+        this.statistics = new Statistics()
+    }
+
+    public static Closure qualityMonographs = { Doc doc ->
+        (doc.isText()
+                && doc.isMonograph()
+                && !doc.hasPart()
+                && (doc.encodingLevel() != 'marc:PartialPreliminaryLevel' && doc.encodingLevel() != 'marc:PrepublicationLevel'))
+                && !doc.hasRelationshipWithContribution()
+    }
+
+    void show() {
+        println(Html.START)
+        run({ cluster ->
+            return {
+                try {
+                    Collection<Collection<Doc>> docs = titleClusters(cluster)
+
+                    if (docs.isEmpty() || docs.size() == 1 && docs.first().size() == 1) {
+                        return
+                    }
+
+                    println(docs
+                            .collect { it.sort { a, b -> a.getWork()['@type'] <=> b.getWork()['@type'] } }
+                            .collect { it.sort { it.numPages() } }
+                            .collect { Html.clusterTable(it) }
+                            .join('') + Html.HORIZONTAL_RULE
+                    )
+                }
+                catch (NoWorkException e) {
+                    System.err.println(e.getMessage())
+                }
+                catch (Exception e) {
+                    System.err.println(e.getMessage())
+                    e.printStackTrace(System.err)
+                }
+            }
+        })
+        println(Html.END)
+    }
+
+    void showWorks() {
+        println(Html.START)
+        run({ cluster ->
+            return {
+                try {
+                    println(mergedWorks(titleClusters(cluster)).findAll { it.derivedFrom.size() > 1 }
+                            .collect { [new Doc(whelk, it.work)] + it.derivedFrom }
+                            .collect { Html.clusterTable(it) }
+                            .join('') + Html.HORIZONTAL_RULE
+                    )
+                }
+                catch (Exception e) {
+                    System.err.println(e.getMessage())
+                    e.printStackTrace(System.err)
+                }
+            }
+        })
+        println(Html.END)
+    }
+
+    void showHubs() {
+        println(Html.START)
+        run({ cluster ->
+            return {
+                try {
+                    def hub = mergedWorks(titleClusters(cluster))
+                            .collect { [new Doc(whelk, it.work)] + it.derivedFrom }
+                    if (hub.size() > 1) {
+                        println(Html.hubTable(hub) + Html.HORIZONTAL_RULE)
+                    }
+                }
+                catch (Exception e) {
+                    System.err.println(e.getMessage())
+                    e.printStackTrace(System.err)
+                }
+            }
+        })
+        println(Html.END)
+    }
+
+    void merge() {
+        def s = statistics.printOnShutdown()
+        reportDir.mkdirs()
+
+        run({ cluster ->
+            return {
+                def titles = titleClusters(cluster)
+                def works = mergedWorks(titles)
+
+                works.each {
+                    if (it.derivedFrom.size() > 1) {
+                        store(it)
+                    }
+                }
+
+                String report = htmlReport(titles, works)
+
+                new File(reportDir, "${Html.clusterId(cluster)}.html") << report
+                works.each {
+                    s.increment('num derivedFrom', "${it.derivedFrom.size()}", it.work.shortId)
+                    new File(reportDir, "${it.work.shortId}.html") << report
+                }
+            }
+        })
+    }
+
+    void revert() {
+        run({ cluster ->
+            return {
+                def docs = cluster.collect(whelk.&getDocument).grep()
+
+                Set<String> works = []
+
+                docs.each { Document d ->
+                    def sum = d.getChecksum(whelk.jsonld)
+                    works << getPathSafe(d.data, d.workIdPath)
+                    def revertTo = whelk.storage.loadAllVersions(d.shortId)
+                            .reverse()
+                            .find { v -> getPathSafe(v.data, v.workIdPath) == null }
+                    d.data = revertTo.data
+                    d.setGenerationDate(new Date())
+                    d.setGenerationProcess(generationProcess)
+                    whelk.storeAtomicUpdate(d, !loud, changedIn, changedBy, sum)
+                }
+
+                works.grep().each {
+                    def shortId = it.split("[#/]")[-2]
+                    whelk.remove(shortId, changedIn, changedBy)
+                }
+            }
+        })
+    }
+
+    String htmlReport(Collection<Collection<Doc>> titleClusters, Collection<MergedWork> works) {
+        if (titleClusters.isEmpty() || titleClusters.size() == 1 && titleClusters.first().size() == 1) {
+            return ""
+        }
+
+        StringBuilder s = new StringBuilder()
+
+        s.append(Html.START)
+        s.append("<h1>Title cluster(s)</h1>")
+        titleClusters.each { it.each { it.addComparisonProps() } }
+
+        titleClusters
+                .collect { it.sort { a, b -> a.getWork()['@type'] <=> b.getWork()['@type'] } }
+                .collect { it.sort { it.numPages() } }
+                .each {
+                    s.append(Html.clusterTable(it))
+                    s.append(Html.HORIZONTAL_RULE)
+                }
+        titleClusters.each { it.each { it.removeComparisonProps() } }
+
+        s.append("<h1>Extracted works</h1>")
+        works.collect { [new Doc(whelk, it.work)] + it.derivedFrom }
+                .each { s.append(Html.clusterTable(it)) }
+
+        s.append(Html.END)
+        return s.toString()
+    }
+
+    class MergedWork {
+        Document work
+        Collection<Doc> derivedFrom
+    }
+
+    private Document buildWorkDocument(Map workData) {
+        String workId = IdGenerator.generate()
+
+        workData['@id'] = "TEMPID#it"
+        Document d = new Document([
+                "@graph": [
+                        [
+                                "@id"          : "TEMPID",
+                                "@type"        : "Record",
+                                "mainEntity"   : ["@id": "TEMPID#it"],
+                                "technicalNote": [[
+                                                          "@type"  : "TechnicalNote",
+                                                          "hasNote": [[
+                                                                              "@type": "Note",
+                                                                              "label": ["Maskinellt utbrutet verk... TODO"]
+                                                                      ]],
+                                                          "uri"    : ["http://xlbuild.libris.kb.se/works/$date/merged-works/${workId}.html".toString()]
+
+                                                  ]
+                                ]],
+                        workData
+                ]
+        ])
+
+        d.setGenerationDate(new Date())
+        d.setGenerationProcess(generationProcess)
+        d.deepReplaceId(Document.BASE_URI.toString() + workId)
+        return d
+    }
+
+    private void store(MergedWork work) {
+        if (!dryRun) {
+            whelk.setSkipIndex(skipIndex)
+            if (!whelk.createDocument(work.work, changedIn, changedBy,
+                    LegacyIntegrationTools.determineLegacyCollection(work.work, whelk.getJsonld()), false)) {
+                throw new WhelkRuntimeException("Could not store new work: ${work.work.shortId}")
+            }
+
+            String workIri = work.work.thingIdentifiers.first()
+
+            work.derivedFrom
+                    .collect { it.ogDoc }
+                    .each {
+                        def sum = it.getChecksum(whelk.jsonld)
+                        it.data[JsonLd.GRAPH_KEY][1]['instanceOf'] = [(JsonLd.ID_KEY): workIri]
+                        it.setGenerationDate(new Date())
+                        it.setGenerationProcess(generationProcess)
+                        whelk.storeAtomicUpdate(it, !loud, changedIn, changedBy, sum)
+                    }
+        }
+    }
+
+    private Collection<MergedWork> mergedWorks(Collection<Collection> titleClusters) {
+        def works = []
+        titleClusters.each { titleCluster ->
+            titleCluster.sort { it.numPages() }
+            WorkComparator c = new WorkComparator(WorkComparator.allFields(titleCluster))
+
+            works.addAll(partition(titleCluster, { Doc a, Doc b -> c.sameWork(a, b) })
+                    .each { work -> work.each { doc -> doc.removeComparisonProps() } }
+                    .collect { new MergedWork(work: buildWorkDocument(c.merge(it)), derivedFrom: it) })
+        }
+
+        return works
+    }
+
+
+    void subTitles() {
+        statistics.printOnShutdown(10)
+        run({ cluster ->
+            return {
+                String titles = cluster.collect(whelk.&getDocument).collect {
+                    getPathSafe(it.data, ['@graph', 1, 'hasTitle', 0, 'subtitle'])
+                }.grep().join('\n')
+
+                if (!titles.isBlank()) {
+                    println(titles + '\n')
+                }
+            }
+        })
+    }
+
+    void printInstanceValue(String field) {
+        run({ cluster ->
+            return {
+                String values = cluster.collect(whelk.&getDocument).collect {
+                    "${it.shortId}\t${getPathSafe(it.data, ['@graph', 1, field])}"
+                }.join('\n')
+
+                println(values + '\n')
+            }
+        })
+    }
+
+    void fictionNotFiction() {
+        run({ cluster ->
+            return {
+                Collection<Collection<Doc>> titleClusters = titleClusters(cluster)
+
+                for (titleCluster in titleClusters) {
+                    if (titleCluster.size() > 1) {
+                        def statuses = WorkComparator.compare(cluster)
+                        if (!statuses[DIFF].contains('contribution')) {
+                            String gf = titleCluster.collect { it.getDisplayText('genreForm') }.join(' ')
+                            if (gf.contains('marc/FictionNotFurtherSpecified') && gf.contains('marc/NotFictionNotFurtherSpecified')) {
+                                println(titleCluster.collect { it.getDoc().shortId }.join('\t'))
+                            }
+                        }
+                    }
+                }
+            }
+        })
+    }
+
+    void swedishFiction() {
+        def swedish = { Doc doc ->
+            Util.asList(doc.getWork()['language']).collect { it['@id'] } == ['https://id.kb.se/language/swe']
+        }
+
+        run({ cluster ->
+            return {
+                def c = loadDocs(cluster)
+                        .findAll(qualityMonographs)
+                        .findAll(swedish)
+                        .findAll { d -> !d.isDrama() }
+
+                if (c.any { it.isFiction() } && !c.any { it.isNotFiction() }) {
+                    println(c.collect { it.doc.shortId }.join('\t'))
+                }
+            }
+        })
+    }
+
+    void filterClusters(Closure<Collection<Doc>> predicate) {
+        run({ cluster ->
+            return {
+                if (predicate(loadDocs(cluster))) {
+                    println(cluster.join('\t'))
+                }
+            }
+        })
+    }
+
+    void filterDocs(Closure<Doc> predicate) {
+        run({ cluster ->
+            return {
+                def c = loadDocs(cluster).findAll(predicate)
+                if (c.size() > 0) {
+                    println(c.collect { it.doc.shortId }.join('\t'))
+                }
+            }
+        })
+    }
+
+    void translationNoTranslator() {
+        run({ cluster ->
+            return {
+                def c = loadDocs(cluster)
+
+                if (c) {
+                    if (c.any { it.isTranslation() }) {
+                        if (c.any { it.hasTranslator() }) {
+                            c = c.findAll { !it.isTranslationWithoutTranslator() }
+                        } else {
+                            int pages = c.first().numPages()
+                            if (c.any { it.numPages() != pages }) {
+                                return // drop cluster
+                            }
+                        }
+                    }
+                }
+
+                if (c.size() > 0) {
+                    println(c.collect { it.doc.shortId }.join('\t'))
+                }
+            }
+        })
+    }
+
+    void outputTitleClusters() {
+        run({ cluster ->
+            return {
+                titleClusters(cluster).findAll { it.size() > 1 }.each {
+                    println(it.collect { it.doc.shortId }.join('\t'))
+                }
+            }
+        })
+    }
+
+    void add9pu() {
+        statistics.printOnShutdown()
+        run({ cluster ->
+            return {
+                statistics.increment('add 9pu', 'clusters checked')
+                def docs = cluster
+                        .collect(whelk.&getDocument)
+                        .findAll()
+                        .collect { [doc: it, checksum: it.getChecksum(whelk.jsonld), changed: false] }
+
+                def ill = ['@id': Relator.ILLUSTRATOR.iri]
+                def pu = ['@id': Relator.PRIMARY_RIGHTS_HOLDER.iri]
+                def path = ['@graph', 1, 'instanceOf', 'contribution']
+
+                docs.each {
+                    Document d = it.doc
+
+                    statistics.increment('add 9pu', 'docs checked')
+
+                    getPathSafe(d.data, path, []).each { Map c ->
+                        def r = asList(c.role)
+
+                        if (pu in r || !(ill in r) || c.'@type' == 'PrimaryContribution')
+                            return
+
+                        for (Map other : docs) {
+                            Document od = other.doc
+
+                            def found9pu = false
+
+                            getPathSafe(od.data, path, []).each { Map oc ->
+                                if (asList(c.agent) == asList(oc.agent) && asList(oc.role).containsAll([ill, pu])) {
+                                    c.role = asList(c.role) + pu
+                                    found9pu = true
+                                    statistics.increment('add 9pu', "9pu added")
+                                    if (verbose) {
+                                        println("${d.shortId} <- ${od.shortId}")
+                                    }
+                                    return
+                                }
+                            }
+
+                            if (found9pu) {
+                                println(c)
+                                it.changed = true
+                                break
+                            }
+                        }
+                    }
+                }
+
+                docs.each {
+                    if (!dryRun && it.changed) {
+                        Document d = it.doc
+                        d.setGenerationDate(new Date())
+                        d.setGenerationProcess(generationProcess)
+                        whelk.storeAtomicUpdate(d, !loud, changedIn, changedBy, it.checksum)
+                    }
+                }
+            }
+        })
+    }
+
+    void fetchContributionFromRespStatement() {
+        def loadThingByIri = { String iri ->
+            // TODO: fix whelk, add load by IRI method
+            whelk.storage.loadDocumentByMainId(iri)?.with { doc ->
+                return (Map) doc.data['@graph'][1]
+            }
+        }
+
+        def loadIfLink = { it['@id'] ? loadThingByIri(it['@id']) : it }
+
+        statistics.printOnShutdown()
+        run({ cluster ->
+            return {
+                statistics.increment('fetch contribution from respStatement', 'clusters checked')
+                def docs = cluster
+                        .collect(whelk.&getDocument)
+                        .findAll()
+                        .collect { [doc: it, checksum: it.getChecksum(whelk.jsonld), changed: false] }
+
+                docs.each {
+                    Document d = it.doc
+                    def respStatement = getPathSafe(d.data, ['@graph', 1, 'responsibilityStatement'])
+                    if (!respStatement)
+                        return
+
+                    statistics.increment('fetch contribution from respStatement', 'docs checked')
+
+                    def contributionsInRespStmt = parseRespStatement(respStatement)
+                    def contribution = getPathSafe(d.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
+
+                    contribution.each { Map c ->
+                        asList(c.agent).each { a ->
+                            def matchedOnName = contributionsInRespStmt.find { n, r ->
+                                nameMatch(n, loadIfLink(a))
+                            }
+
+                            if (!matchedOnName)
+                                return
+
+                            // Contributor found locally, omit from further search
+                            contributionsInRespStmt.remove(matchedOnName.key)
+
+                            def dontAdd = { Relator relator, boolean isFirstStmtPart ->
+                                relator == Relator.UNSPECIFIED_CONTRIBUTOR
+                                        || isFirstStmtPart && relator == Relator.AUTHOR
+                                        && c.'@type' != 'PrimaryContribution'
+                            }
+
+                            def rolesInRespStatement = matchedOnName.value
+                                    .findResults { dontAdd(it) ? null : it.getV1() }
+
+                            if (rolesInRespStatement.isEmpty())
+                                return
+
+                            def rolesInContribution = asList(c.role).findAll { it.'@id' != Relator.UNSPECIFIED_CONTRIBUTOR.iri }
+
+                            // Replace Adapter with Editor
+                            it.changed |= rolesInRespStatement.removeAll { r ->
+                                r == Relator.EDITOR && rolesInContribution.findIndexOf {
+                                    it.'@id' == Relator.ADAPTER.iri
+                                }.with {
+                                    if (it == -1) {
+                                        return false
+                                    } else {
+                                        rolesInContribution[it]['@id'] = Relator.EDITOR.iri
+                                        return true
+                                    }
+                                }
+                            }
+
+                            if (rolesInRespStatement.size() <= rolesInContribution.size())
+                                return
+
+                            rolesInRespStatement.each { r ->
+                                def idLink = ['@id': r.iri]
+                                if (!(idLink in rolesInContribution)) {
+                                    rolesInContribution << idLink
+                                    it.changed = true
+                                    def roleShort = r.iri.split('/').last()
+                                    statistics.increment('fetch contribution from respStatement', "$roleShort roles specified")
+                                    if (verbose) {
+                                        println("${chipString(c, whelk)} (${d.shortId}) <- $roleShort")
+                                    }
+                                }
+                            }
+
+                            c.role = rolesInContribution
+                        }
+                    }
+
+                    def comparable = {
+                        it*.getV1().findResults { Relator r ->
+                            r != Relator.UNSPECIFIED_CONTRIBUTOR
+                                    ? ['@id': r.iri]
+                                    : null
+                        }
+                    }
+
+                    contributionsInRespStmt.each { name, roles ->
+                        for (Map other : docs) {
+                            Document od = other.doc
+                            def matched = getPathSafe(od.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
+                                    .find { Map c ->
+                                        asList(c.agent).any { a ->
+                                            loadIfLink(a).with { nameMatch(name, it) && !(it.description =~ /(?i)pseud/) }
+                                                    && comparable(roles).with { r -> !r.isEmpty() && asList(c.role).containsAll(r) }
+                                                    && Util.bestEncodingLevel.indexOf(d.getEncodingLevel()) <= Util.bestEncodingLevel.indexOf(od.getEncodingLevel())
+                                        }
+                                    }
+                            if (matched) {
+                                contribution << matched
+                                roles.each {
+                                    def roleShort = it.getV1().iri.split('/').last()
+                                    statistics.increment('fetch contribution from respStatement', "$roleShort found in cluster")
+                                }
+                                if (verbose) {
+                                    println("${d.shortId} <- ${chipString(matched, whelk)} (${od.shortId})")
+                                }
+                                it.changed = true
+                                break
+                            }
+                        }
+                    }
+                }
+
+                docs.each {
+                    if (!dryRun && it.changed) {
+                        Document d = it.doc
+                        d.setGenerationDate(new Date())
+                        d.setGenerationProcess(generationProcess)
+                        whelk.storeAtomicUpdate(d, !loud, changedIn, changedBy, it.checksum)
+                    }
+                }
+            }
+        }
+
+        )
+    }
+
+    void linkContribution() {
+        def loadThingByIri = { String iri ->
+            // TODO: fix whelk, add load by IRI method
+            whelk.storage.loadDocumentByMainId(iri)?.with { doc ->
+                return (Map) doc.data['@graph'][1]
+            }
+        }
+
+        def loadIfLink = { it['@id'] ? loadThingByIri(it['@id']) : it }
+
+        statistics.printOnShutdown()
+        run({ cluster ->
+            return {
+                statistics.increment('link contribution', 'clusters checked')
+                // TODO: check work language?
+                def docs = cluster
+                        .collect(whelk.&getDocument)
+                        .collect { [doc: it, checksum: it.getChecksum(whelk.jsonld), changed: false] }
+
+                List<Map> linked = []
+                docs.each { d ->
+                    def contribution = getPathSafe(d.doc.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
+                    contribution.each { Map c ->
+                        if (c.agent && c.agent['@id']) {
+                            loadThingByIri(c.agent['@id'])?.with { Map agent ->
+                                agent.roles = asList(c.role)
+                                linked << agent
+                            }
+                        }
+                    }
+                    statistics.increment('link contribution', 'docs checked')
+                }
+
+                docs.each {
+                    Document d = it.doc
+                    def contribution = getPathSafe(d.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
+                    contribution.each { Map c ->
+                        if (c.agent && !c.agent['@id']) {
+                            def l = linked.find {
+                                agentMatches(c.agent, it) && (!c.role || it.roles.containsAll(c.role))
+                            }
+                            if (l) {
+                                println("${d.shortId} ${chipString(c, whelk)} --> ${chipString(l, whelk)}")
+                                c.agent = ['@id': l['@id']]
+                                it.changed = true
+                                statistics.increment('link contribution', 'agents linked')
+                            } else if (verbose) {
+                                println("${d.shortId} NO MATCH: ${chipString(c, whelk)} ??? ${linked.collect { chipString(it, whelk) }}")
+                            }
+                        }
+                    }
+                }
+
+                List<Map> primaryAutAgents = []
+                docs.each {
+                    def contribution = getPathSafe(it.doc.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
+                    def p = contribution.findAll()
+                    contribution.each {
+                        if (it['@type'] == 'PrimaryContribution' && it['role'] == ['@id': 'https://id.kb.se/relator/author'] && it['agent']) {
+                            Map agent = loadIfLink(it['agent'])
+                            if (agent) {
+                                primaryAutAgents << agent
+                            }
+                        }
+                    }
+                }
+
+                docs.each {
+                    Document d = it.doc
+                    def contribution = getPathSafe(d.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
+                    contribution.each { Map c ->
+                        if (c['@type'] == 'PrimaryContribution' && !c.role) {
+                            if (c.agent) {
+                                def agent = loadIfLink(c.agent)
+                                if (primaryAutAgents.any { agentMatches(agent, it) }) {
+                                    c.role = ['@id': 'https://id.kb.se/relator/author']
+                                    it.changed = true
+                                    statistics.increment('link contribution', 'author role added to primary contribution')
+                                }
+                            }
+                        }
+                    }
+                }
+
+                docs.each {
+                    if (!dryRun && it.changed) {
+                        Document d = it.doc
+                        d.setGenerationDate(new Date())
+                        d.setGenerationProcess(generationProcess)
+                        whelk.storeAtomicUpdate(d, !loud, changedIn, changedBy, it.checksum)
+                    }
+                }
+            }
+        })
+    }
+
+    static boolean agentMatches(Map local, Map linked) {
+        nameMatch(local, linked) && !yearMismatch(local, linked)
+    }
+
+    static boolean nameMatch(Object local, Map agent) {
+        def variants = [agent] + asList(agent.hasVariant)
+        def name = {
+            Map p ->
+                (p.givenName && p.familyName)
+                        ? normalize("${p.givenName} ${p.familyName}")
+                        : p.name ? normalize("${p.name}") : null
+        }
+
+        def localName = local instanceof Map ? name(local) : normalize(local)
+
+        localName && variants.any {
+            name(it) && localName == name(it)
+        }
+    }
+
+    static boolean yearMismatch(Map local, Map linked) {
+        def birth = { Map p -> p.lifeSpan?.with { (it.replaceAll(/[^\-0-9]/, '').split('-') as List)[0] } }
+        def death = { Map p -> p.lifeSpan?.with { (it.replaceAll(/[^\-0-9]/, '').split('-') as List)[1] } }
+        def b = birth(local) && birth(linked) && birth(local) != birth(linked)
+        def d = death(local) && death(linked) && death(local) != death(linked)
+        b || d
+    }
+
+    private void run(Function<List<String>, Runnable> f) {
+        ExecutorService s = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 4)
+
+        AtomicInteger i = new AtomicInteger()
+        clusters.eachLine() {
+            List<String> cluster = Arrays.asList(it.split(/[\t ]+/))
+
+            s.submit({
+                try {
+                    f.apply(cluster).run()
+                    int n = i.incrementAndGet()
+                    if (n % 100 == 0) {
+                        System.err.println("$n")
+                    }
+                }
+                catch (NoWorkException e) {
+                    //println("No work:" + e.getMessage())
+                }
+                catch (Exception e) {
+                    e.printStackTrace()
+                }
+            })
+        }
+
+        s.shutdown()
+        s.awaitTermination(1, TimeUnit.DAYS)
+    }
+
+    private Collection<Doc> loadDocs(Collection<String> cluster) {
+        whelk
+                .bulkLoad(cluster).values()
+                .collect { new Doc(whelk, it) }
+    }
+
+    private Collection<Collection<Doc>> titleClusters(Collection<String> cluster) {
+        loadDocs(cluster)
+                .findAll(qualityMonographs)
+                .each { it.addComparisonProps() }
+                .with { partitionByTitle(it) }
+                .findAll { it.size() > 1 }
+                .findAll { !it.any { doc -> doc.hasGenericTitle() } }
+                .sort { a, b -> a.first().mainEntityDisplayTitle() <=> b.first().mainEntityDisplayTitle() }
+    }
+
+    Collection<Collection<Doc>> partitionByTitle(Collection<Doc> docs) {
+        return partition(docs) { Doc a, Doc b ->
+            !a.getTitleVariants().intersect(b.getTitleVariants()).isEmpty()
+        }
+    }
+
+}
+
+class NoWorkException extends RuntimeException {
+    NoWorkException(String msg) {
+        super(msg)
+    }
+}
+
+
+
+
+
+
+
+
+
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Classification.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Classification.groovy
new file mode 100644
index 0000000000..2dd9a10e7e
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Classification.groovy
@@ -0,0 +1,63 @@
+package whelk.WorkMerging.compare
+
+class Classification extends StuffSet {
+    @Override
+    Object merge(Object a, Object b) {
+        return mergeCompatibleElements(super.merge(a, b)) { c1, c2 ->
+            String code1 = c1['code']
+            String code2 = c2['code']
+            if (!code1 || !code2) {
+                return
+            }
+            code1 = code1.trim()
+            code2 = code2.trim()
+
+            if (isSab(c1) && isSab(c2) && (code1.startsWith(code2) || code2.startsWith(code1))) {
+                def result = [
+                        '@type' : 'Classification',
+                        'code'  : code1.size() > code2.size() ? code1 : code2,
+                        inScheme: [
+                                '@type'  : 'ConceptScheme',
+                                'code'   : 'kssb'
+                        ]
+                ]
+                def version = maxSabVersion(c1, c2)
+                if (version) {
+                    result['inScheme']['version'] = version
+                }
+                return result
+            }
+            else if (isDewey(c1) && isDewey(c2) && code1 == code2) {
+                Map result = [:]
+                result.putAll(c1)
+                result.putAll(c2)
+                result['editionEnumeration'] = maxDeweyEdition(c1, c2)
+                return result
+            }
+        }
+    }
+
+    boolean isSab(Map c) {
+        c['inScheme'] && c['inScheme']['code'] == 'kssb'
+    }
+
+    String maxSabVersion(c1, c2) {
+        def v1 = c1['inScheme']['version'] ?: "-1"
+        def v2 = c2['inScheme']['version'] ?: "-1"
+        Integer.parseInt(v1) > Integer.parseInt(v2) ? v1 : v2
+    }
+
+    boolean isDewey(Map c) {
+        c['@type'] == 'ClassificationDdc'
+    }
+
+    String maxDeweyEdition(c1, c2) {
+        def v1 = c1['editionEnumeration']
+        def v2 = c2['editionEnumeration']
+        deweyEdition(v1) > deweyEdition(v2) ? v1 : v2
+    }
+
+    int deweyEdition(String edition) {
+        Integer.parseInt((edition ?: "0").replaceAll("[^0-9]", ""))
+    }
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Default.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Default.groovy
new file mode 100644
index 0000000000..07e0635234
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Default.groovy
@@ -0,0 +1,13 @@
+package whelk.WorkMerging.compare
+
+class Default implements FieldHandler {
+    @Override
+    boolean isCompatible(Object a, Object b) {
+        return false
+    }
+
+    @Override
+    Object merge(Object a, Object b) {
+        return a
+    }
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Extent.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Extent.groovy
new file mode 100644
index 0000000000..078a3fee78
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Extent.groovy
@@ -0,0 +1,15 @@
+package whelk.WorkMerging.compare;
+
+class Extent implements FieldHandler {
+
+    // TODO: allow one side missing extent (-1)?
+    @Override
+    boolean isCompatible(Object a, Object b) {
+        return true // a * 0.7 < b && a * 1.3 > b
+    }
+
+    @Override
+    Object merge(Object a, Object b) {
+        return b; // not part of final work
+    }
+}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/FieldHandler.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/FieldHandler.groovy
new file mode 100644
index 0000000000..22a95fd2a9
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/FieldHandler.groovy
@@ -0,0 +1,12 @@
+package whelk.WorkMerging.compare
+
+import whelk.WorkMerging.Doc
+
+interface FieldHandler {
+    boolean isCompatible(Object a, Object b)
+    Object merge(Object a, Object b)
+}
+
+interface ValuePicker extends FieldHandler {
+    Object pick(Collection<Doc> values)
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/GenreForm.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/GenreForm.groovy
new file mode 100644
index 0000000000..690e9353ff
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/GenreForm.groovy
@@ -0,0 +1,36 @@
+package whelk.WorkMerging.compare
+
+import whelk.WorkMerging.DocumentComparator
+
+//FIXME
+class GenreForm extends StuffSet {
+    private static final DocumentComparator c = new DocumentComparator()
+
+    // Terms that will be merged (values precede keys)
+    private static def norm = [
+            (['@id': 'https://id.kb.se/marc/NotFictionNotFurtherSpecified']): [
+                    ['@id': 'https://id.kb.se/marc/FictionNotFurtherSpecified'],
+                    ['@id': 'https://id.kb.se/marc/Autobiography'],
+                    ['@id': 'https://id.kb.se/marc/Biography']
+            ],
+            (['@id': 'https://id.kb.se/marc/FictionNotFurtherSpecified'])   : [
+                    ['@id': 'https://id.kb.se/marc/Poetry'],
+                    ['@id': 'https://id.kb.se/marc/Novel']
+            ],
+    ]
+
+    @Override
+    Object merge(Object a, Object b) {
+        return mergeCompatibleElements(super.merge(a, b).findAll { it.'@id' }) { gf1, gf2 ->
+            if (n(gf1, gf2)) {
+                gf2
+            } else if (n(gf2, gf1)) {
+                gf1
+            }
+        }
+    }
+
+    boolean n(a, b) {
+        norm[a]?.any { it == b || n(it, b) }
+    }
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/SameOrEmpty.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/SameOrEmpty.groovy
new file mode 100644
index 0000000000..f36f580773
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/SameOrEmpty.groovy
@@ -0,0 +1,21 @@
+package whelk.WorkMerging.compare
+
+import static whelk.WorkMerging.Util.asList
+
+class SameOrEmpty implements FieldHandler {
+    Object link
+
+    SameOrEmpty(String iri) {
+        this.link = [['@id': iri]]
+    }
+
+    @Override
+    boolean isCompatible(Object a, Object b) {
+        (!a && asList(b) == link) || (!b && asList(a) == link)
+    }
+
+    @Override
+    Object merge(Object a, Object b) {
+        return a ?: b
+    }
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/StuffSet.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/StuffSet.groovy
new file mode 100644
index 0000000000..92262086f7
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/StuffSet.groovy
@@ -0,0 +1,38 @@
+package whelk.WorkMerging.compare
+
+
+import java.util.function.BiFunction
+
+import static whelk.WorkMerging.Util.asList
+
+class StuffSet implements FieldHandler {
+    @Override
+    boolean isCompatible(Object a, Object b) {
+        true
+    }
+
+    @Override
+    Object merge(Object a, Object b) {
+        return ((asList(a) as Set) + (asList(b) as Set)).collect()
+    }
+
+    static Object mergeCompatibleElements(Object o, BiFunction<Object, Object, Object> s) {
+        boolean changed = false
+        List result = []
+        asList(o).each {
+            def merged = null
+            for (int i = 0 ; i < result.size() ; i++) {
+                merged = s.apply(result[i], it)
+                if (merged) {
+                    result[i] = merged
+                    changed = true
+                    break
+                }
+            }
+            if (merged == null) {
+                result << it
+            }
+        }
+        return changed ? mergeCompatibleElements(result, s) : result
+    }
+}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Subject.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Subject.groovy
new file mode 100644
index 0000000000..e69fb633e7
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Subject.groovy
@@ -0,0 +1,8 @@
+package whelk.WorkMerging.compare
+
+class Subject extends StuffSet {
+    @Override
+    Object merge(Object a, Object b) {
+        return super.merge(a, b).findAll { it.'@id' || it.'@type' == 'ComplexSubject' }
+    }
+}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/TranslationOf.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/TranslationOf.groovy
new file mode 100644
index 0000000000..73836e6fee
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/TranslationOf.groovy
@@ -0,0 +1,22 @@
+package whelk.WorkMerging.compare
+
+import whelk.WorkMerging.DocumentComparator
+
+class TranslationOf implements FieldHandler {
+    DocumentComparator c = new DocumentComparator()
+    
+    @Override
+    boolean isCompatible(Object a, Object b) {
+        // @type is sometimes Work, sometimes Text. Should not matter for comparison
+        (!a && !b) || a && b && a instanceof Map && b instanceof Map && c.isEqual(noType(a), noType(b))
+    }
+
+    @Override
+    Object merge(Object a, Object b) {
+        return a // TODO: prefer one @type over another?
+    }
+    
+    Map noType(Map m) {
+        m.findAll { k, v -> k != '@type' }
+    }
+}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/WorkTitle.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/WorkTitle.groovy
new file mode 100644
index 0000000000..4c948af25a
--- /dev/null
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/WorkTitle.groovy
@@ -0,0 +1,23 @@
+package whelk.WorkMerging.compare
+
+import whelk.WorkMerging.Doc
+import whelk.WorkMerging.Util
+import org.apache.commons.lang3.NotImplementedException
+
+class WorkTitle implements ValuePicker {
+
+    @Override
+    boolean isCompatible(Object a, Object b) {
+        return !a || !b || !Util.getTitleVariants(a).intersect(Util.getTitleVariants(b)).isEmpty()
+    }
+
+    @Override
+    Object merge(Object a, Object b) {
+        throw new NotImplementedException('')
+    }
+    
+    @Override
+    Object pick(Collection<Doc> values) {
+        return Util.bestTitle(values)
+    }
+}

From 68bd85011ce2f6e4519eba44dc938504c224d040 Mon Sep 17 00:00:00 2001
From: Jannis Mohlin Tsiroyannis <jannis.mohlintsiroyannis@kb.se>
Date: Wed, 9 Nov 2022 10:10:20 +0100
Subject: [PATCH 9/9] Revert "Move already existing work merging code in from
 an experimental branch"

This reverts commit 8758d0fad8843d28ae70ff7676a5a0cc6d47f91a.
---
 .../whelk/importer/DatasetImporter.groovy     |   3 +-
 .../whelk/{WorkMerging => }/WorkMerging.java  |   6 +-
 .../whelk/WorkMerging/DisjointSets.java       | 220 -----
 .../main/groovy/whelk/WorkMerging/Doc.groovy  | 363 --------
 .../whelk/WorkMerging/DocumentComparator.java | 231 -----
 .../whelk/WorkMerging/FieldStatus.groovy      |   7 -
 .../main/groovy/whelk/WorkMerging/Html.groovy | 111 ---
 .../main/groovy/whelk/WorkMerging/Util.groovy | 306 -------
 .../whelk/WorkMerging/WorkComparator.groovy   | 136 ---
 .../whelk/WorkMerging/WorkToolJob.groovy      | 797 ------------------
 .../WorkMerging/compare/Classification.groovy |  63 --
 .../whelk/WorkMerging/compare/Default.groovy  |  13 -
 .../whelk/WorkMerging/compare/Extent.groovy   |  15 -
 .../WorkMerging/compare/FieldHandler.groovy   |  12 -
 .../WorkMerging/compare/GenreForm.groovy      |  36 -
 .../WorkMerging/compare/SameOrEmpty.groovy    |  21 -
 .../whelk/WorkMerging/compare/StuffSet.groovy |  38 -
 .../whelk/WorkMerging/compare/Subject.groovy  |   8 -
 .../WorkMerging/compare/TranslationOf.groovy  |  22 -
 .../WorkMerging/compare/WorkTitle.groovy      |  23 -
 20 files changed, 3 insertions(+), 2428 deletions(-)
 rename whelk-core/src/main/groovy/whelk/{WorkMerging => }/WorkMerging.java (98%)
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/DisjointSets.java
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/Doc.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/DocumentComparator.java
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/FieldStatus.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/Html.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/Util.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/WorkComparator.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/WorkToolJob.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/Classification.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/Default.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/Extent.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/FieldHandler.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/GenreForm.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/SameOrEmpty.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/StuffSet.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/Subject.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/TranslationOf.groovy
 delete mode 100644 whelk-core/src/main/groovy/whelk/WorkMerging/compare/WorkTitle.groovy

diff --git a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
index 394037cf73..3fddb229c8 100644
--- a/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
+++ b/importers/src/main/groovy/whelk/importer/DatasetImporter.groovy
@@ -2,7 +2,7 @@ package whelk.importer
 
 import groovy.util.logging.Log4j2 as Log
 import groovy.transform.CompileStatic
-import whelk.WorkMerging.WorkMerging
+import whelk.WorkMerging
 
 import static groovy.transform.TypeCheckingMode.SKIP
 
@@ -11,6 +11,7 @@ import whelk.JsonLd
 import whelk.TargetVocabMapper
 import whelk.Whelk
 import whelk.converter.TrigToJsonLdParser
+import whelk.exception.CancelUpdateException
 import whelk.util.DocumentUtil
 import static whelk.util.LegacyIntegrationTools.NO_MARC_COLLECTION
 
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/WorkMerging.java b/whelk-core/src/main/groovy/whelk/WorkMerging.java
similarity index 98%
rename from whelk-core/src/main/groovy/whelk/WorkMerging/WorkMerging.java
rename to whelk-core/src/main/groovy/whelk/WorkMerging.java
index 99a2105145..6d9fb2e6d0 100644
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/WorkMerging.java
+++ b/whelk-core/src/main/groovy/whelk/WorkMerging.java
@@ -1,8 +1,4 @@
-package whelk.WorkMerging;
-
-import whelk.Document;
-import whelk.IdGenerator;
-import whelk.Whelk;
+package whelk;
 
 import java.util.*;
 
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/DisjointSets.java b/whelk-core/src/main/groovy/whelk/WorkMerging/DisjointSets.java
deleted file mode 100644
index 7cd3e33a6c..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/DisjointSets.java
+++ /dev/null
@@ -1,220 +0,0 @@
-package whelk.WorkMerging;
-
-import java.util.List;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * This class keeps track of a set of disjoint (non-overlapping) sets.
- */
-public class DisjointSets<T> {
-    /**
-     * Sets as forest of rooted trees.
-     * Pointer to parent in tree, root points to itself.
-     */
-    List<Integer> forest;
-
-    /**
-     * Rank of each tree node (keeps trees balanced when merging).
-     */
-    List<Integer> ranks;
-
-    /**
-     * Sets as circular linked lists (so that we can find all elements in a set).
-     * Pointer to the next element in the set.
-     */
-    List<Integer> sets;
-
-    /**
-     * Map from set element value to index
-     */
-    Map<T, Integer> ixs;
-
-    /**
-     * Map from set element index to value
-     */
-    List<T> ixToValue;
-
-    public DisjointSets(int initialCapacity) {
-        forest = new ArrayList<>(initialCapacity);
-        ranks = new ArrayList<>(initialCapacity);
-        sets = new ArrayList<>(initialCapacity);
-        ixs = new HashMap<>(initialCapacity);
-        ixToValue = new ArrayList<>(initialCapacity);
-    }
-
-    public DisjointSets() {
-        this(20);
-    }
-
-    /**
-     * Create a new set if it doesn't already exist.
-     *
-     * @param e initial element in set
-     */
-    public void createSet(T e) {
-        if (ixs.containsKey(e)) {
-            return;
-        }
-
-        int ix = forest.size();
-        ixs.put(e, ix);
-        forest.add(ix);
-        ranks.add(0);
-        sets.add(ix);
-        ixToValue.add(e);
-
-        if (ix == Integer.MAX_VALUE) {
-            throw new IllegalStateException("size > Integer.MAX_VALUE");
-        }
-    }
-
-    /**
-     * Add a set, merging it with existing intersecting sets
-     *
-     * @param set a set to be added
-     */
-    public void addSet(Iterable<T> set) {
-        Iterator<T> i = set.iterator();
-        if (!i.hasNext()) {
-            return;
-        }
-
-        T first = i.next();
-        while (i.hasNext()) {
-            mergeSets(first, i.next());
-        }
-    }
-
-    /**
-     * Merge two sets identified by elements.
-     * Sets will be created if they don't exist
-     *
-     * @param a an element of the first set
-     * @param b an element of the second set
-     */
-    public void mergeSets(T a, T b) {
-        if (!ixs.containsKey(a)) {
-            createSet(a);
-        }
-        if (!ixs.containsKey(b)) {
-            createSet(b);
-        }
-
-        int ixA = ixs.get(a);
-        int ixB = ixs.get(b);
-
-        int rootA = root(ixA);
-        int rootB = root(ixB);
-
-        if (rootA == rootB) {
-            return;
-        }
-
-        int rankA = ranks.get(rootA);
-        int rankB = ranks.get(rootB);
-
-        if (rankA > rankB) {
-            forest.set(rootB, rootA);
-        } else {
-            forest.set(rootA, rootB);
-            if (rankA == rankB) {
-                ranks.set(rootB, rankB + 1);
-            }
-        }
-
-        int link = sets.get(rootA);
-        sets.set(rootA, sets.get(rootB));
-        sets.set(rootB, link);
-    }
-
-    /**
-     * Lookup a set based on an element in the set
-     *
-     * @param e an element in the set
-     * @return the set
-     */
-    public Set<T> getSet(T e) {
-        if (!ixs.containsKey(e)) {
-            throw new IllegalArgumentException("No set with element: " + e);
-        }
-
-        Set<T> result = new HashSet<>();
-        int start = sets.get(ixs.get(e));
-        int node = start;
-        do {
-            result.add(ixToValue.get(node));
-            node = sets.get(node);
-        } while (node != start);
-
-        return result;
-    }
-
-    /**
-     * Iterate over all sets
-     *
-     * @param visitor
-     */
-    public void iterateAllSets(SetVisitor<T> visitor) {
-        boolean[] visited = new boolean[sets.size()];
-
-        for (int ix : sets) {
-            if (visited[ix]) {
-                continue;
-            }
-
-            int start = sets.get(ix);
-            int node = start;
-            do {
-                visited[node] = true;
-                visitor.nextElement(ixToValue.get(node));
-                node = sets.get(node);
-            } while (node != start);
-
-            visitor.closeSet();
-        }
-    }
-
-    /**
-     * @return a set with all sets
-     */
-    public Set<Set<T>> allSets() {
-        final Set<Set<T>> result = new HashSet<>();
-
-        iterateAllSets(new SetVisitor<T>() {
-            Set<T> current = new HashSet<>();
-
-            public void closeSet() {
-                result.add(current);
-                current = new HashSet<>();
-            }
-
-            public void nextElement(T e) {
-                current.add(e);
-            }
-        });
-
-        return result;
-    }
-
-    private int root(int node) {
-        while (node != forest.get(node)) {
-            int parent = forest.get(node);
-            //path splitting - point node to grandparent
-            forest.set(node, forest.get(parent));
-            node = parent;
-        }
-
-        return node;
-    }
-
-    public interface SetVisitor<T> {
-        void nextElement(T e);
-
-        void closeSet();
-    }
-}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/Doc.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/Doc.groovy
deleted file mode 100644
index c50a7abf85..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/Doc.groovy
+++ /dev/null
@@ -1,363 +0,0 @@
-package whelk.WorkMerging
-
-import se.kb.libris.Normalizers
-import whelk.Document
-import whelk.JsonLd
-import whelk.Whelk
-
-import static whelk.WorkMerging.Util.asList
-
-class Doc {
-    public static final String SAOGF_SKÖN = 'https://id.kb.se/term/saogf/Sk%C3%B6nlitteratur'
-    public static final List MARC_FICTION = [
-            'https://id.kb.se/marc/FictionNotFurtherSpecified',
-            'https://id.kb.se/marc/Drama',
-            'https://id.kb.se/marc/Essay',
-            'https://id.kb.se/marc/Novel',
-            'https://id.kb.se/marc/HumorSatiresEtc',
-            'https://id.kb.se/marc/Letter',
-            'https://id.kb.se/marc/ShortStory',
-            'https://id.kb.se/marc/MixedForms',
-            'https://id.kb.se/marc/Poetry',
-    ]
-    public static final List MARC_NOT_FICTION = [
-            'https://id.kb.se/marc/NotFictionNotFurtherSpecified',
-            'https://id.kb.se/marc/Biography'
-    ]
-    public static final List DRAMA_GF = [
-            'https://id.kb.se/term/saogf/Dramatik',
-            'https://id.kb.se/marc/Drama'
-    ]
-
-    Whelk whelk
-    Document doc
-    Map work
-    Map framed
-    List<String> titles
-
-    //FIXME
-    Document ogDoc
-
-    Doc(Whelk whelk, Document doc) {
-        this.whelk = whelk
-        this.doc = doc
-        this.ogDoc = doc.clone()
-    }
-
-    Map getWork() {
-        if (!work) {
-            work = getWork(whelk, doc)
-        }
-
-        return work
-    }
-
-    static Map getWork(Whelk whelk, Document d) {
-        Map work = Normalizers.getWork(whelk.jsonld, d)
-        if (!work) {
-            throw new NoWorkException(d.shortId)
-        }
-        work = new HashMap<>(work)
-
-        //TODO 'marc:fieldref'
-
-        work.remove('@id')
-        return work
-    }
-
-    Map workCopy() {
-        return getWork(whelk, doc.clone())
-    }
-
-    Map getMainEntity() {
-        return doc.data['@graph'][1]
-    }
-
-    boolean isInstance() {
-        return getMainEntity().containsKey('instanceOf')
-    }
-
-    List<String> getTitleVariants() {
-        if (!titles) {
-            titles = Util.getTitleVariants(getMainEntity()['hasTitle'])
-        }
-
-        return titles
-    }
-
-    boolean hasGenericTitle() {
-        Util.hasGenericTitle(getMainEntity()['hasTitle'])
-    }
-
-    private static String displayTitle(Map thing) {
-        thing['hasTitle'].collect { it['@type'] + ": " + it['flatTitle'] }.join(', ')
-    }
-
-    String mainEntityDisplayTitle() {
-        displayTitle(['hasTitle': Util.flatTitles(getMainEntity()['hasTitle'])])
-    }
-
-    String link() {
-        String base = Document.getBASE_URI().toString()
-        String kat = "katalogisering/"
-        String id = doc.shortId
-        return base + kat + id
-    }
-
-    boolean isMonograph() {
-        getMainEntity()['issuanceType'] == 'Monograph'
-    }
-
-    boolean hasPart() {
-        getWork()['hasPart'] != null
-    }
-
-    String encodingLevel() {
-        return doc.data['@graph'][0]['encodingLevel'] ?: ''
-    }
-
-    int numPages() {
-        String extent = Util.getPathSafe(getMainEntity(), ['extent', 0, 'label', 0]) ?: Util.getPathSafe(getMainEntity(), ['extent', 0, 'label'], '')
-        return numPages(extent)
-    }
-
-    // TODO: improve parsing https://metadatabyran.kb.se/beskrivning/materialtyper-arbetsfloden/tryckta-monografier/omfang-for-tryckta-monografier
-    static int numPages(String extentLabel) {
-        def l = extentLabel.replace('onumrerade', '')
-        def matcher = l =~ /(\d+)(?=[, \[\]0-9]*[sp])/
-        List<Integer> pages = []
-        while (matcher.find()) {
-            pages << Integer.parseInt(matcher.group(1))
-        }
-        pages ? pages.max() : -1
-    }
-
-    // TODO...
-    String getDisplayText(String field) {
-        if (field == 'contribution') {
-            return contributorStrings().join("<br>")
-        } else if (field == 'classification') {
-            return classificationStrings().join("<br>")
-        } else if (field == 'instance title') {
-            return isInstance() ? (getMainEntity()['hasTitle'] ?: '') : ''
-        } else if (field == 'work title') {
-            // To load hasTitle from linked work in instanceOf we can use getFramed()
-            // However we then need to handle that getFramed() loads linked instances in hasTitle.source
-            // Prefer getMainEntity() for now
-            return isInstance() ? (getMainEntity()['instanceOf']['hasTitle'] ?: '') : (getMainEntity()['hasTitle'] ?: '')
-        } else if (field == 'instance type') {
-            return isInstance() ? getMainEntity()['@type'] : ''
-        } else if (field == 'editionStatement') {
-            return getMainEntity()['editionStatement'] ?: ''
-        } else if (field == 'responsibilityStatement') {
-            return getMainEntity()['responsibilityStatement'] ?: ''
-        } else if (field == 'encodingLevel') {
-            return encodingLevel()
-        } else if (field == 'publication') {
-            return chipString(getMainEntity()['publication'] ?: [])
-        } else if (field == 'identifiedBy') {
-            return chipString(getMainEntity()['identifiedBy'] ?: [])
-        } else if (field == 'extent') {
-            return chipString(getMainEntity()['extent'] ?: [])
-        } else if (field == 'reproductionOf') {
-            return reproductionOfLink()
-        } else {
-            return chipString(getWork().getOrDefault(field, []))
-        }
-    }
-
-    protected String chipString(def thing) {
-        Util.chipString(thing, whelk)
-    }
-
-    String tooltip(String string, String tooltip) {
-        """<abbr title="${tooltip}">${string}</abbr>"""
-    }
-
-    private String reproductionOfLink() {
-        def shortId = Util.getPathSafe(getMainEntity(), ['reproductionOf', '@id'])
-                ?.tokenize("/#")
-                ?.dropRight(1)
-                ?.last() ?: ''
-
-        return "<a href=\"#$shortId\">$shortId</a>"
-    }
-
-    private List classificationStrings() {
-        List path = isInstance() ? ['instanceOf', 'classification'] : ['classification']
-        List<Map> classification = Util.getPathSafe(getFramed(), path, [])
-        classification.collect() { c ->
-            StringBuilder s = new StringBuilder()
-            s.append(flatMaybeLinked(c['inScheme'], ['code', 'version']).with { it.isEmpty() ? it : it + ': ' })
-            s.append(flatMaybeLinked(c, ['code']))
-            return s.toString()
-        }
-    }
-
-    private List contributorStrings() {
-        List path = isInstance() ? ['instanceOf', 'contribution'] : ['contribution']
-        List contribution = Util.getPathSafe(getFramed(), path, [])
-
-        return contribution.collect { Map c ->
-            contributionStr(c)
-        }
-    }
-
-    protected Map getFramed() {
-        if (!framed) {
-            if (isInstance()) {
-                framed = JsonLd.frame(doc.getThingIdentifiers().first(), whelk.loadEmbellished(doc.shortId).data)
-            } else {
-                Document copy = doc.clone()
-                whelk.embellish(copy)
-                framed = JsonLd.frame(doc.getThingIdentifiers().first(), copy.data)
-            }
-        }
-
-        return framed
-    }
-
-    private String contributionStr(Map contribution) {
-        StringBuilder s = new StringBuilder()
-
-        if (contribution['@type'] == 'PrimaryContribution') {
-            s.append('<b>')
-        }
-
-        s.append(flatMaybeLinked(contribution['role'], ['code', 'label']).with { it.isEmpty() ? it : it + ': ' })
-        s.append(flatMaybeLinked(contribution['agent'], ['givenName', 'familyName', 'lifeSpan', 'name']))
-
-        if (contribution['@type'] == 'PrimaryContribution') {
-            s.append('</b>')
-        }
-
-        return s.toString()
-    }
-
-    static String flatten(Object o, List order, String mapSeparator = ': ') {
-        if (o instanceof String) {
-            return o
-        }
-        if (o instanceof List) {
-            return o
-                    .collect { flatten(it, order) }
-                    .join(' || ')
-        }
-        if (o instanceof Map) {
-            return order
-                    .findResults { ((Map) o).get(it) }
-                    .collect { flatten(it, order) }
-                    .join(mapSeparator)
-        }
-
-        throw new RuntimeException(String.format("unexpected type: %s for %s", o.class.getName(), o))
-    }
-
-    private String flatMaybeLinked(Object thing, List order) {
-        if (!thing)
-            return ''
-
-        if (thing instanceof List) {
-            return thing.collect { flatMaybeLinked(it, order) }.join(' | ')
-        }
-        String s = flatten(thing, order, ', ')
-
-        thing['@id']
-                ? """<a href="${thing['@id']}">$s</a>"""
-                : s
-    }
-
-    boolean isFiction() {
-        isMarcFiction() || isSaogfFiction() || isSabFiction()
-    }
-
-    boolean isMarcFiction() {
-        (getWork()['genreForm'] ?: []).any { it['@id'] in MARC_FICTION }
-    }
-
-    boolean isMarcNotFiction() {
-        (getWork()['genreForm'] ?: []).any { it['@id'] in MARC_NOT_FICTION }
-    }
-
-    boolean isSaogfFiction() {
-        (getWork()['genreForm'] ?: []).any { whelk.relations.isImpliedBy(SAOGF_SKÖN, it['@id'] ?: '') }
-    }
-
-    boolean isSabFiction() {
-        classificationStrings().any { it.contains('kssb') && it.contains(': H') }
-    }
-
-    boolean isNotFiction() {
-        // A lot of fiction has marc/NotFictionNotFurtherSpecified but then classification is usually empty
-        isMarcNotFiction() && (!classificationStrings().isEmpty() && !isSabFiction())
-    }
-
-    boolean isText() {
-        getWork()['@type'] == 'Text'
-    }
-
-    boolean isTranslationWithoutTranslator() {
-        isTranslation() && !hasTranslator()
-    }
-
-    boolean isTranslation() {
-        getWork()['translationOf']
-    }
-
-    boolean isSabDrama() {
-        classificationStrings().any { it.contains(': Hc.02') || it.contains(': Hce.02') }
-    }
-
-    boolean isGfDrama() {
-        asList(getWork()['genreForm']).any { it['@id'] in DRAMA_GF }
-    }
-
-    boolean isDrama() {
-        isSabDrama() || isGfDrama()
-    }
-
-    boolean hasRole(String relatorIri) {
-        asList(getWork()['contribution']).any {
-            asList(it['role']).contains(['@id': relatorIri])
-        }
-    }
-
-    boolean hasTranslator() {
-        hasRole('https://id.kb.se/relator/translator')
-    }
-
-    boolean hasDistinguishingEdition() {
-        (getMainEntity()['editionStatement'] ?: '').toString().toLowerCase().contains("förk")
-    }
-
-    boolean hasRelationshipWithContribution() {
-        asList(getWork()['relationship']).any { r ->
-            asList(r['entity']).any { e ->
-                e.containsKey('contribution')
-            }
-        }
-    }
-
-    void addComparisonProps() {
-        if (hasDistinguishingEdition()) {
-            addToWork('editionStatement')
-        }
-        getWork()['_numPages'] = numPages()
-    }
-
-    void moveSummaryToInstance() {
-        if (getWork()['summary']) {
-            getMainEntity()['summary'] = asList(getMainEntity()['summary']) + asList(getWork()['summary'])
-            getWork().remove('summary')
-        }
-    }
-
-    void addToWork(String field) {
-        getWork()[field] = getMainEntity()[field]
-    }
-
-    void removeComparisonProps() {
-        getWork().remove('editionStatement')
-        getWork().remove('_numPages')
-    }
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/DocumentComparator.java b/whelk-core/src/main/groovy/whelk/WorkMerging/DocumentComparator.java
deleted file mode 100644
index 2adb902b0b..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/DocumentComparator.java
+++ /dev/null
@@ -1,231 +0,0 @@
-package whelk.WorkMerging;
-
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Stack;
-import java.util.function.Function;
-
-public class DocumentComparator {
-    private static final Comparator<Object> BY_HASH = (o1, o2) -> o2.hashCode() - o1.hashCode();
-
-    private final Function<Object, Boolean> isOrderedList;
-
-    public DocumentComparator() {
-        this(o -> "termComponentList".equals(o));
-    }
-
-    public DocumentComparator(Function<Object, Boolean> isOrderedList) {
-        if (isOrderedList == null)
-            throw new NullPointerException();
-        this.isOrderedList = isOrderedList;
-    }
-
-    public boolean isEqual(Map<?, ?> a, Map<?, ?> b) {
-        if (a == null || b == null || a.size() != b.size()) {
-            return false;
-        }
-        for (Object key : a.keySet()) {
-            if (!isEqual(a.get(key), b.get(key), key)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    private boolean isEqual(Object a, Object b, Object key) {
-        if (a == null || b == null) {
-            return false;
-        }
-        else if (a.getClass() != b.getClass()) {
-            return (isSingleItemList(a) && isEqual(((List<?>) a).get(0), b, key)
-                    || (isSingleItemList(b) && isEqual(a, ((List<?>) b).get(0), key)));
-        }
-        else if (a instanceof Map) {
-            return isEqual((Map<?, ?>) a, (Map<?, ?>) b);
-        }
-        else if (a instanceof List) {
-            if (isOrderedList.apply(key)) {
-                return isEqualOrdered((List<?>) a, (List<?>) b);
-            } else {
-                return isEqualUnordered((List<?>) a, (List<?>) b);
-            }
-        }
-        else {
-            return a.equals(b);
-        }
-    }
-
-    private boolean isSingleItemList(Object o) {
-        return o instanceof List && ((List<?>) o).size() == 1;
-    }
-
-    private boolean isEqualOrdered(List<?> a, List<?> b) {
-        if (a.size() != b.size()) {
-            return false;
-        }
-        for (int i = 0; i < a.size(); i++) {
-            if (!isEqual(a.get(i), b.get(i), null)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    private boolean isEqualUnordered(List<?> a, List<?> b) {
-        if (a.size() != b.size()) {
-            return false;
-        }
-
-        a.sort(BY_HASH);
-        b.sort(BY_HASH);
-        
-        List<Integer> taken = new ArrayList<>(a.size());
-        nextA: for (int i = 0 ; i < a.size() ; i++) {
-            for (int j = 0 ; j < b.size() ; j++) {
-                if (!taken.contains(j) && isEqual(a.get(i), b.get(j), null)) {
-                    taken.add(j);
-                    continue nextA;
-                }
-            }
-            return false;
-        }
-
-        return true;
-    }
-
-    public boolean isSubset(Map<?, ?> a, Map<?, ?> b) {
-        if (a == null || b == null || a.size() > b.size()) {
-            return false;
-        }
-        for (Object key : a.keySet()) {
-            if (!isSubset(a.get(key), b.get(key), key)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    private boolean isSubset(Object a, Object b, Object key) {
-        if (a == null || b == null || a.getClass() != b.getClass()) {
-            return false;
-        }
-        else if (a instanceof Map) {
-            return isSubset((Map<?, ?>) a, (Map<?, ?>) b);
-        }
-        else if (a instanceof List) {
-            if (isOrderedList.apply(key)) {
-                return isOrderedSubset((List<?>) a, (List<?>) b);
-            } else {
-                return isUnorderedSubset((List<?>) a, (List<?>) b);
-            }
-        }
-        else {
-            return a.equals(b);
-        }
-    }
-
-    private boolean isOrderedSubset(List<?> a, List<?> b) {
-        if (a.size() > b.size()) {
-            return false;
-        }
-        int ixB = 0;
-        for (int ixA = 0; ixA < a.size(); ixA++) {
-            if (ixB == b.size()) {
-                return false;
-            }
-
-            while (!isSubset(a.get(ixA), b.get(ixB++), null)) {
-                if (ixB == b.size()) {
-                    return false;
-                }
-            }
-        }
-        return true;
-    }
-
-    private boolean isUnorderedSubset(List<?> a, List<?> b) {
-        return new UnorderedListComparator(a, b).isSubset();
-    }
-
-    private class UnorderedListComparator {
-        List a;
-        List b;
-
-        Stack<Integer> stack;
-        Stack<Integer> matched;
-        boolean anyMatch;
-        Boolean[][] cache;
-
-        UnorderedListComparator(List<?> a, List<?> b) {
-            this.a = a;
-            this.b = b;
-            cache = new Boolean[a.size()][b.size()];
-        }
-
-        boolean isSubset() {
-            // since elements in 'a' might be subsets of more than one element
-            // in 'b' we must try different ways of matching elements
-            stack = new Stack<>();
-            matched = new Stack<>();
-
-            nextA();
-            while (stack.size() > 0) {
-                boolean match = isSubset(ixA(), ixB());
-                nextB();
-                if (match) {
-                    anyMatch = true;
-                    if (!matched.contains(ixB())) {
-                        matched.push(ixB());
-                        if (matched.size() == a.size()) {
-                            return true;
-                        }
-                        nextA();
-                    }
-                }
-
-                while (ixB() == b.size()) {
-                    if (!anyMatch) {
-                        return false;
-                    }
-                    previousA();
-                }
-            }
-
-            return false;
-        }
-
-        private boolean isSubset(int ixA, int ixB) {
-            if (cache[ixA][ixB] == null) {
-                cache[ixA][ixB] = DocumentComparator.this.isSubset(a.get(ixA), b.get(ixB), null);
-            }
-
-            return cache[ixA][ixB];
-        }
-
-        private void previousA() {
-            stack.pop();
-            if (matched.size() > 0) {
-                matched.pop();
-            }
-        }
-
-        private void nextA() {
-            stack.push(0);
-            anyMatch = false;
-        }
-
-        private void nextB() {
-            stack.push(stack.pop() + 1);
-        }
-
-        private int ixA() {
-            return stack.size() - 1;
-        }
-
-        private int ixB() {
-            return stack.size() > 0 ? stack.peek() : -1;
-        }
-    }
-}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/FieldStatus.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/FieldStatus.groovy
deleted file mode 100644
index a33445d1b4..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/FieldStatus.groovy
+++ /dev/null
@@ -1,7 +0,0 @@
-package whelk.WorkMerging
-
-enum FieldStatus {
-    EQUAL,
-    COMPATIBLE,
-    DIFF
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/Html.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/Html.groovy
deleted file mode 100644
index c313415618..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/Html.groovy
+++ /dev/null
@@ -1,111 +0,0 @@
-package whelk.WorkMerging
-
-import org.apache.commons.codec.digest.DigestUtils
-
-import static whelk.WorkMerging.FieldStatus.COMPATIBLE
-import static whelk.WorkMerging.FieldStatus.DIFF
-import static whelk.WorkMerging.FieldStatus.EQUAL
-
-class Html {
-    private static String CSS = Html.class.getClassLoader()
-            .getResourceAsStream('merge-works/table.css').getText("UTF-8")
-
-    static final String START = """<html><head>
-                    <meta charset="UTF-8">
-                    <style>$CSS</style>
-                    </head><body>"""
-    static final String END = '</body></html>'
-    static final String HORIZONTAL_RULE = "<hr/><br/>\n"
-
-    static def infoFields = ['reproductionOf', 'instance title', 'work title', 'instance type', 'editionStatement', 'responsibilityStatement', 'encodingLevel', 'publication', 'identifiedBy', 'extent']
-
-    static String clusterTable(Collection<Doc> cluster) {
-        String id = clusterId(cluster.collect { it.doc.shortId })
-        String header = """
-            <tr>
-                <th><a id="${id}"><a href="#${id}">${id}</th>
-                ${cluster.collect { doc -> "<th><a id=\"${doc.doc.shortId}\" href=\"${doc.link()}\">${doc.doc.shortId}</a></th>" }.join('\n')}
-            </tr>
-            <tr>
-                <td></td>
-                ${cluster.collect { doc -> "<td>${doc.mainEntityDisplayTitle()}</td>" }.join('\n')}                                     
-            </tr>
-           """.stripIndent()
-
-        def statuses = WorkComparator.compare(cluster)
-
-        String info = infoFields.collect(fieldRows(cluster, "info")).join('\n')
-        String equal = statuses.get(EQUAL, []).collect(fieldRows(cluster, cluster.size() > 1 ? EQUAL.toString() : "")).join('\n')
-        String compatible = statuses.get(COMPATIBLE, []).collect(fieldRows(cluster, COMPATIBLE.toString())).join('\n')
-        String diff = statuses.get(DIFF, []).collect(fieldRows(cluster, DIFF.toString())).join('\n')
-
-        return """
-            <table>
-                ${header}
-                ${equal}
-                ${compatible}
-                ${diff}
-                ${info}
-            </table>
-            <br/><br/>
-        """
-    }
-
-    static String hubTable(List<Collection<Doc>> docs) {
-        def mergedWorks = docs*.first()
-        def ids = docs.collect { group ->
-            group.drop(1).collectEntries { doc ->
-                [doc.doc.shortId, doc.link()]
-            }
-        }
-        def clusterId = clusterId(ids*.keySet().flatten())
-
-        String header = """
-            <tr>
-                <th><a id="${clusterId}"><a href="#${clusterId}">${clusterId}</th>
-                ${mergedWorks.collect { "<th></th>" }.join('\n')}
-            </tr>
-           """.stripIndent()
-
-        String derivedFrom =
-                """
-                    <tr class="info">
-                        <td>_derivedFrom</td>
-                        ${ids.collect { "<td>${it.collect { id, link -> "<a id=\"$id\" href=\"$link\">$id</a>" }.join('\n')}</td>" }.join('\n')}
-                        </tr> 
-                """.stripIndent()
-
-        def statuses = WorkComparator.compare(mergedWorks)
-
-        String equal = statuses.get(EQUAL, []).collect(fieldRows(mergedWorks, mergedWorks.size() > 1 ? EQUAL.toString() : "")).join('\n')
-        String compatible = statuses.get(COMPATIBLE, []).collect(fieldRows(mergedWorks, COMPATIBLE.toString())).join('\n')
-        String diff = statuses.get(DIFF, []).collect(fieldRows(mergedWorks, DIFF.toString())).join('\n')
-
-        return """
-            <table>
-                ${header}
-                ${equal}
-                ${compatible}
-                ${diff}
-                ${derivedFrom}
-            </table>
-            <br/><br/>
-        """
-    }
-
-    static String clusterId(Collection<String> cluster) {
-        cluster
-                ? DigestUtils.md5Hex(cluster.sort().first()).toUpperCase().substring(0, 12)
-                : ""
-    }
-
-    private static def fieldRows(Collection<Doc> cluster, String cls) {
-        { field ->
-            """
-            <tr class="${cls}">
-                <td>${field}</td>
-                ${cluster.collect { "<td>${it.getDisplayText(field)}</td>" }.join('\n')}   
-            </tr> """.stripIndent()
-        }
-    }
-}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/Util.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/Util.groovy
deleted file mode 100644
index 07a876cff3..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/Util.groovy
+++ /dev/null
@@ -1,306 +0,0 @@
-package whelk.WorkMerging
-
-import org.apache.commons.lang3.StringUtils
-import whelk.Whelk
-import whelk.util.Unicode
-
-import java.util.regex.Pattern
-
-class Util {
-    static def titleComponents = ['mainTitle', 'titleRemainder', 'subtitle', 'hasPart', 'partNumber', 'partName', 'marc:parallelTitle', 'marc:equalTitle']
-
-    static def titleVariant = ['Title', 'ParallelTitle']
-    // removed 'VariantTitle', 'CoverTitle' since they sometimes contain random generic stuff like "Alibis filmroman", "Kompisböcker för de yngsta"
-
-    static enum Relator {
-        TRANSLATOR('https://id.kb.se/relator/translator'),
-        AUTHOR('https://id.kb.se/relator/author'),
-        ILLUSTRATOR('https://id.kb.se/relator/illustrator'),
-        AUTHOR_OF_INTRO('https://id.kb.se/relator/authorOfIntroduction'),
-        ADAPTER('https://id.kb.se/relator/adapter'),
-        COVER_DESIGNER('https://id.kb.se/relator/coverDesigner'),
-        COMPILER('https://id.kb.se/relator/compiler'),
-        AUTHOR_OF_AFTERWORD('https://id.kb.se/relator/authorOfAfterwordColophonEtc'),
-        PHOTOGRAPHER('https://id.kb.se/relator/photographer'),
-        EDITOR('https://id.kb.se/relator/editor'),
-        UNSPECIFIED_CONTRIBUTOR('https://id.kb.se/relator/unspecifiedContributor'),
-        PRIMARY_RIGHTS_HOLDER('https://id.kb.se/relator/primaryRightsHolder')
-
-        String iri
-
-        private Relator(String iri) {
-            this.iri = iri
-        }
-    }
-
-//    private static Set<String> IGNORED_SUBTITLES = WorkToolJob.class.getClassLoader()
-//            .getResourceAsStream('merge-works/ignored-subtitles.txt')
-//            .readLines().grep().collect(Util.&normalize) as Set
-
-    private static Set<String> GENERIC_TITLES = WorkToolJob.class.getClassLoader()
-            .getResourceAsStream('merge-works/generic-titles.txt')
-            .readLines().grep().collect(Util.&normalize) as Set
-
-    static def noise =
-            [",", '"', "'", '[', ']', ',', '.', '.', ':', ';', '-', '(', ')', ' the ', '-', '–', '+', '!', '?'].collectEntries { [it, ' '] }
-
-
-    static List asList(Object o) {
-        (o ?: []).with { it instanceof List ? it : [it] }
-    }
-
-    /**
-     * Partition a collection based on equality condition
-     *
-     * NOTE: O(n^2)...
-     */
-    static <T> Collection<Collection<T>> partition(Collection<T> collection, Closure matcher) {
-        List<List<T>> result = []
-
-        for (T t : collection) {
-            boolean match = false
-            for (List<T> group : result) {
-                if (groupMatches(t, group, matcher)) {
-                    group.add(t)
-                    match = true
-                    break
-                }
-            }
-
-            if (!match) {
-                result.add([t])
-            }
-        }
-        return result
-    }
-
-    static <T> boolean groupMatches(T t, List<T> group, Closure matcher) {
-        for (T other : group) {
-            if (matcher(other, t)) {
-                return true
-            }
-        }
-        return false
-    }
-
-    static boolean hasGenericTitle(List hasTitle) {
-        hasTitle.any { it['mainTitle'] && normalize((String) it['mainTitle']) in GENERIC_TITLES }
-    }
-
-    static List dropSubTitles(List hasTitle) {
-        hasTitle.collect { t ->
-            def copy = new TreeMap(t)
-            copy.subMap(copy.keySet() - ['subtitle', 'titleRemainder'])
-        }
-    }
-
-//    static List dropGenericSubTitles(List hasTitle) {
-//        hasTitle.collect {
-//            def copy = new TreeMap(it)
-//            if (copy['subtitle'] || copy['titleRemainder']) {
-//                DocumentUtil.traverse(copy) { value, path ->
-//                    if (('subtitle' in path || 'titleRemainder' in path) && value instanceof String && genericSubtitle(value)) {
-//                        new DocumentUtil.Remove()
-//                    }
-//                }
-//            }
-//            copy
-//        }
-//    }
-
-    static List flatTitles(List hasTitle) {
-        dropSubTitles(hasTitle).collect {
-            def title = new TreeMap<>()
-            title['flatTitle'] = normalize(Doc.flatten(it, titleComponents))
-            if (it['@type']) {
-                title['@type'] = it['@type']
-            }
-
-            title
-        }
-    }
-
-//    private static boolean genericSubtitle(String s) {
-//        s = Util.normalize(s)
-//        if (s.startsWith("en ")) {
-//            s = s.substring("en ".length())
-//        }
-//        return s in IGNORED_SUBTITLES
-//    }
-
-    static String normalize(String s) {
-        return Unicode.asciiFold(Unicode.normalizeForSearch(StringUtils.normalizeSpace(" $s ".toLowerCase().replace(noise))))
-    }
-
-    static Object getPathSafe(item, path, defaultTo = null) {
-        for (p in path) {
-            if ((item instanceof Collection || item instanceof Map) && item[p] != null) {
-                item = item[p]
-            } else {
-                return defaultTo
-            }
-        }
-        return item
-    }
-
-
-    static List<String> getTitleVariants(List hasTitle) {
-        flatTitles(hasTitle)
-                .grep { it['@type'] in titleVariant }
-                .collect { it['flatTitle'] }
-    }
-
-    static String chipString(def thing, Whelk whelk) {
-        if (thing instanceof Integer) {
-            return thing
-        }
-
-        def chips = whelk.jsonld.toChip(thing)
-        if (chips.size() < 2) {
-            chips = thing
-        }
-        if (chips instanceof List) {
-            return chips.collect { valuesString(it) }.sort().join('<br>')
-        }
-        return valuesString(chips)
-    }
-
-    private static String valuesString(def thing) {
-        if (thing instanceof List) {
-            return thing.collect { valuesString(it) }.join(' • ')
-        }
-        if (thing instanceof Map) {
-            return thing.findAll { k, v -> k != '@type' }.values().collect { valuesString(it) }.join(' • ')
-        }
-        return thing.toString()
-    }
-
-    // (docs on some of these levels are normally filtered out before we reach here)
-    static List bestEncodingLevel = [
-            'marc:FullLevel',
-            'marc:FullLevelMaterialNotExamined',
-            'marc:MinimalLevel',
-            'marc:LessThanFullLevelMaterialNotExamined',
-            'marc:CoreLevel',
-            'marc:AbbreviatedLevel',
-            'marc:PartialPreliminaryLevel',
-            'marc:PrepublicationLevel',
-            null
-    ]
-
-    // Return the most common title for the best encodingLevel
-    static Object bestTitle(Collection<Doc> docs) {
-        def isTitle = { it.'@type' == 'Title' }
-        def addSource = { t, d -> t.plus(['source': [d.getMainEntity().subMap('@id')]]) }
-
-        for (def level : bestEncodingLevel) {
-            def titles = docs
-                    .findAll { it.encodingLevel() == level }
-                    .collect { d ->
-                        d.getWork().get('hasTitle')?.findAll(isTitle)
-                                ?: d.getMainEntity().get('hasTitle')?.findResults { isTitle(it) ? addSource(it, d) : null }
-                    }
-                    .grep()
-
-            if (!titles) {
-                continue
-            }
-
-            titles = titles.collect(Util.&dropSubTitles)
-            return partition(titles, { a, b -> a == b }).sort { it.size() }.reverse().first().first()
-        }
-
-        return null
-    }
-
-    static Map<String, List<Tuple2<Relator, Boolean>>> parseRespStatement(String respStatement) {
-        def parsedContributions = [:]
-
-        respStatement.split(';').eachWithIndex { part, i ->
-            // TODO: generalize for other material types
-            parseSwedishFictionContribution(StringUtils.normalizeSpace(part), i == 0).each { name, roles ->
-                parsedContributions
-                        .computeIfAbsent(name, r -> [])
-                        .addAll(roles)
-            }
-        }
-
-        return parsedContributions
-    }
-
-    private static Map<String, List<Tuple2<Relator, Boolean>>> parseSwedishFictionContribution(String contribution, boolean isFirstPart) {
-        def roleToPattern =
-                [
-                        (Relator.TRANSLATOR)         : ~/(bemynd(\w+|\.)? )?öf?v(\.|ers(\.|\p{L}+)?)( (till|från) \p{L}+)?|(till svenskan?|från \p{L}+)|svensk text/,
-                        (Relator.AUTHOR)             : ~/^(text(e[nr])?|skriven|written)/,
-                        (Relator.ILLUSTRATOR)        : ~/\bbild(er)?|ill(\.|ustr(\.|\w+)?)|\bvi(gn|nj)ett(er|ill)?|ritad/,
-                        (Relator.AUTHOR_OF_INTRO)    : ~/förord|inl(edn(\.|ing)|edd)/,
-                        (Relator.COVER_DESIGNER)     : ~/omslag/,
-                        (Relator.AUTHOR_OF_AFTERWORD): ~/efter(ord|skrift)/,
-                        (Relator.PHOTOGRAPHER)       : ~/\bfoto\w*\.?/,
-                        (Relator.EDITOR)             : ~/red(\.(?! av)|aktör(er)?)|\bbearb(\.|\w+)?|återberättad|sammanställ\w*/,
-                ]
-
-        def rolePattern = ~/((?iu)${roleToPattern.values().join('|')})/
-        def followsRolePattern = ~/(:| a[fv]| by) /
-        def initialPattern = ~/\p{Lu}/
-        def namePattern = ~/\p{Lu}:?\p{Ll}+('\p{Ll})?(,? [Jj](r|unior))?/
-        def betweenNamesPattern = ~/-| |\. ?| (de(l| la)?|von|van( de[nr])?|v\.|le|af|du|dos) | [ODdLl]'/
-        def fullNamePattern = ~/(($initialPattern|$namePattern)($betweenNamesPattern)?)*$namePattern/
-        def conjPattern = ~/ (och|&|and) /
-        def roleAfterNamePattern = ~/( ?\(($rolePattern$conjPattern)?$rolePattern\))/
-        def fullContributionPattern = ~/(($rolePattern($conjPattern|\/))*$rolePattern$followsRolePattern)?$fullNamePattern($conjPattern$fullNamePattern)*$roleAfterNamePattern?/
-
-        // Make roles lower case so that they can't be mistaken for names
-        contribution = (contribution =~ rolePattern)*.first()
-                .collectEntries { [it, it.toLowerCase()] }
-                .with { contribution.replace(it) }
-
-        def nameToRoles = [:]
-
-        def matched = (contribution =~ fullContributionPattern)*.first()
-
-        matched.each { m ->
-            // Extract roles from the contribution
-            def roles = roleToPattern
-                    .findAll { k, v -> m =~ /(?iu)$v/ }
-                    .with {
-                        it.isEmpty() && contribution =~ /.+$followsRolePattern/
-                                ? [new Tuple2(Relator.UNSPECIFIED_CONTRIBUTOR, isFirstPart)]
-                                : it.collect { role, pattern -> new Tuple2(role, isFirstPart) }
-                    }
-
-            // Author should be the role if first part of respStatement (before ';') and no role seems to be stated
-            if (roles.isEmpty() && isFirstPart) {
-                roles << new Tuple2(Relator.AUTHOR, isFirstPart)
-            }
-
-            // Extract names from the contribution
-            def names = parseNames(fullNamePattern, conjPattern, m)
-
-            // Assign the roles to each name
-            nameToRoles.putAll(names.collectEntries { [it, roles] })
-        }
-
-        return nameToRoles
-    }
-
-    private static List<String> parseNames(Pattern namePattern, Pattern conjPattern, String s) {
-        def names = []
-
-        (s =~ namePattern).each {
-            def name = it.first()
-            // Handle the case of "Jan och Maria Larsson"
-            def previousName = names.isEmpty() ? null : names.last()
-            if (previousName?.split()?.size() == 1 && s =~ /$previousName$conjPattern$name/) {
-                def nameParts = name.split()
-                if (nameParts.size() > 1) {
-                    names[-1] += " ${nameParts.last()}"
-                }
-            }
-            names << name
-        }
-
-        return names
-    }
-}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/WorkComparator.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/WorkComparator.groovy
deleted file mode 100644
index faa369e9d9..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/WorkComparator.groovy
+++ /dev/null
@@ -1,136 +0,0 @@
-package whelk.WorkMerging
-
-import whelk.WorkMerging.compare.Classification
-import whelk.WorkMerging.compare.SameOrEmpty
-import whelk.WorkMerging.compare.Default
-import whelk.WorkMerging.compare.Extent
-import whelk.WorkMerging.compare.FieldHandler
-import whelk.WorkMerging.compare.GenreForm
-import whelk.WorkMerging.compare.StuffSet
-import whelk.WorkMerging.compare.Subject
-import whelk.WorkMerging.compare.TranslationOf
-import whelk.WorkMerging.compare.ValuePicker
-import whelk.WorkMerging.compare.WorkTitle
-
-import static whelk.WorkMerging.Util.bestTitle
-
-class WorkComparator {
-    Set<String> fields
-    DocumentComparator c = new DocumentComparator()
-
-    Map<String, FieldHandler> comparators = [
-            'classification'  : new Classification(),
-            'contentType'     : new SameOrEmpty('https://id.kb.se/term/rda/Text'),
-            'genreForm'       : new GenreForm(),
-            'hasTitle'        : new WorkTitle(),
-            'intendedAudience': new SameOrEmpty('https://id.kb.se/marc/Juvenile'),
-            '_numPages'       : new Extent(),
-            'subject'         : new Subject(),
-            'summary'         : new StuffSet(),
-            'translationOf'   : new TranslationOf(),
-    ]
-
-    static FieldHandler DEFAULT = new Default()
-
-    WorkComparator(Set<String> fields) {
-        this.fields = new HashSet<>(fields)
-    }
-
-    boolean sameWork(Doc a, Doc b) {
-        fields.every { compare(a, b, it).with { it == EQUAL || it == COMPATIBLE } }
-    }
-
-    FieldStatus compare(Doc a, Doc b, String field) {
-        Object oa = a.getWork().get(field)
-        Object ob = b.getWork().get(field)
-
-        if (oa == null && ob == null) {
-            return FieldStatus.EQUAL
-        }
-
-        compareExact(oa, ob, field) == FieldStatus.EQUAL
-                ? FieldStatus.EQUAL
-                : compareDiff(a, b, field)
-    }
-
-    Map merge(Collection<Doc> docs) {
-        Map result = [:]
-
-        if (docs.size() > 1) {
-            fields.each { field ->
-                FieldHandler h = comparators.getOrDefault(field, DEFAULT)
-                def value = h instanceof ValuePicker
-                        ? h.pick(docs)
-                        : mergeField(field, h, docs)
-
-                if (value) {
-                    result[field] = value
-                }
-            }
-        } else {
-            result = docs[0].workCopy()
-        }
-
-        if (!result['hasTitle']) {
-            def bestTitle = bestTitle(docs)
-            if (bestTitle) {
-                result['hasTitle'] = bestTitle
-            }
-        }
-
-        return result
-    }
-
-    // TODO: preserve order? e.g. subject
-    private Object mergeField(String field, FieldHandler h, Collection<Doc> docs) {
-        Object value = docs.first().getWork().get(field)
-        def rest = docs.drop(1)
-        rest.each {
-            value = h.merge(value, it.getWork().get(field))
-        }
-        return value
-    }
-
-    private FieldStatus compareDiff(Doc a, Doc b, String field) {
-        comparators.getOrDefault(field, DEFAULT).isCompatible(a.getWork().get(field), b.getWork().get(field))
-                ? FieldStatus.COMPATIBLE
-                : FieldStatus.DIFF
-    }
-
-    private FieldStatus compareExact(Object oa, Object ob, String field) {
-        c.isEqual([(field): oa], [(field): ob]) ? FieldStatus.EQUAL : FieldStatus.DIFF
-    }
-
-    static Map<FieldStatus, List<String>> compare(Collection<Doc> cluster) {
-        WorkComparator c = new WorkComparator(allFields(cluster))
-
-        Map<FieldStatus, List<String>> result = [:]
-        c.fieldStatuses(cluster).each { f, s -> result.get(s, []) << f }
-        return result
-    }
-
-    static Set<String> allFields(Collection<Doc> cluster) {
-        Set<String> fields = new HashSet<>()
-        cluster.each { fields.addAll(it.getWork().keySet()) }
-        return fields - 'summary' // - 'summary' only temporary, remove when summaries have been moved to instance (LXL-3303)
-    }
-
-    Map<String, FieldStatus> fieldStatuses(Collection<Doc> cluster) {
-        fields.collectEntries { [it, fieldStatus(cluster, it)] }
-    }
-
-    FieldStatus fieldStatus(Collection<Doc> cluster, String field) {
-        boolean anyCompat = false
-        [cluster, cluster].combinations().findResult { List combination ->
-            Doc a = combination.first()
-            Doc b = combination.last()
-
-            def c = compare(a, b, field)
-            if (c == FieldStatus.COMPATIBLE) {
-                anyCompat = true
-            }
-            c == FieldStatus.DIFF ? c : null
-        } ?: (anyCompat ? FieldStatus.COMPATIBLE : FieldStatus.EQUAL)
-    }
-
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/WorkToolJob.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/WorkToolJob.groovy
deleted file mode 100644
index d30deeb9ca..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/WorkToolJob.groovy
+++ /dev/null
@@ -1,797 +0,0 @@
-package whelk.WorkMerging
-
-
-import whelk.Document
-import whelk.IdGenerator
-import whelk.JsonLd
-import whelk.Whelk
-import whelk.exception.WhelkRuntimeException
-import whelk.util.LegacyIntegrationTools
-import whelk.util.Statistics
-
-import java.text.SimpleDateFormat
-import java.util.concurrent.ExecutorService
-import java.util.concurrent.Executors
-import java.util.concurrent.TimeUnit
-import java.util.concurrent.atomic.AtomicInteger
-import java.util.function.Function
-
-import static whelk.WorkMerging.FieldStatus.DIFF
-
-import static whelk.WorkMerging.Util.asList
-import static whelk.WorkMerging.Util.chipString
-import static whelk.WorkMerging.Util.getPathSafe
-import static whelk.WorkMerging.Util.normalize
-import static whelk.WorkMerging.Util.partition
-import static whelk.WorkMerging.Util.parseRespStatement
-import static whelk.WorkMerging.Util.Relator
-
-class WorkToolJob {
-    Whelk whelk
-    Statistics statistics
-    File clusters
-
-    String date = new SimpleDateFormat('yyyyMMdd-HHmmss').format(new Date())
-    String jobId = IdGenerator.generate()
-    File reportDir = new File("reports/$date/merged-works")
-
-    String changedIn = "xl"
-    String changedBy = "SEK"
-    String generationProcess = 'https://libris.kb.se/sys/merge-works'
-    boolean dryRun = true
-    boolean skipIndex = false
-    boolean loud = false
-    boolean verbose = false
-
-    WorkToolJob(File clusters) {
-        this.clusters = clusters
-
-        this.whelk = Whelk.createLoadedSearchWhelk('secret', true)
-        this.statistics = new Statistics()
-    }
-
-    public static Closure qualityMonographs = { Doc doc ->
-        (doc.isText()
-                && doc.isMonograph()
-                && !doc.hasPart()
-                && (doc.encodingLevel() != 'marc:PartialPreliminaryLevel' && doc.encodingLevel() != 'marc:PrepublicationLevel'))
-                && !doc.hasRelationshipWithContribution()
-    }
-
-    void show() {
-        println(Html.START)
-        run({ cluster ->
-            return {
-                try {
-                    Collection<Collection<Doc>> docs = titleClusters(cluster)
-
-                    if (docs.isEmpty() || docs.size() == 1 && docs.first().size() == 1) {
-                        return
-                    }
-
-                    println(docs
-                            .collect { it.sort { a, b -> a.getWork()['@type'] <=> b.getWork()['@type'] } }
-                            .collect { it.sort { it.numPages() } }
-                            .collect { Html.clusterTable(it) }
-                            .join('') + Html.HORIZONTAL_RULE
-                    )
-                }
-                catch (NoWorkException e) {
-                    System.err.println(e.getMessage())
-                }
-                catch (Exception e) {
-                    System.err.println(e.getMessage())
-                    e.printStackTrace(System.err)
-                }
-            }
-        })
-        println(Html.END)
-    }
-
-    void showWorks() {
-        println(Html.START)
-        run({ cluster ->
-            return {
-                try {
-                    println(mergedWorks(titleClusters(cluster)).findAll { it.derivedFrom.size() > 1 }
-                            .collect { [new Doc(whelk, it.work)] + it.derivedFrom }
-                            .collect { Html.clusterTable(it) }
-                            .join('') + Html.HORIZONTAL_RULE
-                    )
-                }
-                catch (Exception e) {
-                    System.err.println(e.getMessage())
-                    e.printStackTrace(System.err)
-                }
-            }
-        })
-        println(Html.END)
-    }
-
-    void showHubs() {
-        println(Html.START)
-        run({ cluster ->
-            return {
-                try {
-                    def hub = mergedWorks(titleClusters(cluster))
-                            .collect { [new Doc(whelk, it.work)] + it.derivedFrom }
-                    if (hub.size() > 1) {
-                        println(Html.hubTable(hub) + Html.HORIZONTAL_RULE)
-                    }
-                }
-                catch (Exception e) {
-                    System.err.println(e.getMessage())
-                    e.printStackTrace(System.err)
-                }
-            }
-        })
-        println(Html.END)
-    }
-
-    void merge() {
-        def s = statistics.printOnShutdown()
-        reportDir.mkdirs()
-
-        run({ cluster ->
-            return {
-                def titles = titleClusters(cluster)
-                def works = mergedWorks(titles)
-
-                works.each {
-                    if (it.derivedFrom.size() > 1) {
-                        store(it)
-                    }
-                }
-
-                String report = htmlReport(titles, works)
-
-                new File(reportDir, "${Html.clusterId(cluster)}.html") << report
-                works.each {
-                    s.increment('num derivedFrom', "${it.derivedFrom.size()}", it.work.shortId)
-                    new File(reportDir, "${it.work.shortId}.html") << report
-                }
-            }
-        })
-    }
-
-    void revert() {
-        run({ cluster ->
-            return {
-                def docs = cluster.collect(whelk.&getDocument).grep()
-
-                Set<String> works = []
-
-                docs.each { Document d ->
-                    def sum = d.getChecksum(whelk.jsonld)
-                    works << getPathSafe(d.data, d.workIdPath)
-                    def revertTo = whelk.storage.loadAllVersions(d.shortId)
-                            .reverse()
-                            .find { v -> getPathSafe(v.data, v.workIdPath) == null }
-                    d.data = revertTo.data
-                    d.setGenerationDate(new Date())
-                    d.setGenerationProcess(generationProcess)
-                    whelk.storeAtomicUpdate(d, !loud, changedIn, changedBy, sum)
-                }
-
-                works.grep().each {
-                    def shortId = it.split("[#/]")[-2]
-                    whelk.remove(shortId, changedIn, changedBy)
-                }
-            }
-        })
-    }
-
-    String htmlReport(Collection<Collection<Doc>> titleClusters, Collection<MergedWork> works) {
-        if (titleClusters.isEmpty() || titleClusters.size() == 1 && titleClusters.first().size() == 1) {
-            return ""
-        }
-
-        StringBuilder s = new StringBuilder()
-
-        s.append(Html.START)
-        s.append("<h1>Title cluster(s)</h1>")
-        titleClusters.each { it.each { it.addComparisonProps() } }
-
-        titleClusters
-                .collect { it.sort { a, b -> a.getWork()['@type'] <=> b.getWork()['@type'] } }
-                .collect { it.sort { it.numPages() } }
-                .each {
-                    s.append(Html.clusterTable(it))
-                    s.append(Html.HORIZONTAL_RULE)
-                }
-        titleClusters.each { it.each { it.removeComparisonProps() } }
-
-        s.append("<h1>Extracted works</h1>")
-        works.collect { [new Doc(whelk, it.work)] + it.derivedFrom }
-                .each { s.append(Html.clusterTable(it)) }
-
-        s.append(Html.END)
-        return s.toString()
-    }
-
-    class MergedWork {
-        Document work
-        Collection<Doc> derivedFrom
-    }
-
-    private Document buildWorkDocument(Map workData) {
-        String workId = IdGenerator.generate()
-
-        workData['@id'] = "TEMPID#it"
-        Document d = new Document([
-                "@graph": [
-                        [
-                                "@id"          : "TEMPID",
-                                "@type"        : "Record",
-                                "mainEntity"   : ["@id": "TEMPID#it"],
-                                "technicalNote": [[
-                                                          "@type"  : "TechnicalNote",
-                                                          "hasNote": [[
-                                                                              "@type": "Note",
-                                                                              "label": ["Maskinellt utbrutet verk... TODO"]
-                                                                      ]],
-                                                          "uri"    : ["http://xlbuild.libris.kb.se/works/$date/merged-works/${workId}.html".toString()]
-
-                                                  ]
-                                ]],
-                        workData
-                ]
-        ])
-
-        d.setGenerationDate(new Date())
-        d.setGenerationProcess(generationProcess)
-        d.deepReplaceId(Document.BASE_URI.toString() + workId)
-        return d
-    }
-
-    private void store(MergedWork work) {
-        if (!dryRun) {
-            whelk.setSkipIndex(skipIndex)
-            if (!whelk.createDocument(work.work, changedIn, changedBy,
-                    LegacyIntegrationTools.determineLegacyCollection(work.work, whelk.getJsonld()), false)) {
-                throw new WhelkRuntimeException("Could not store new work: ${work.work.shortId}")
-            }
-
-            String workIri = work.work.thingIdentifiers.first()
-
-            work.derivedFrom
-                    .collect { it.ogDoc }
-                    .each {
-                        def sum = it.getChecksum(whelk.jsonld)
-                        it.data[JsonLd.GRAPH_KEY][1]['instanceOf'] = [(JsonLd.ID_KEY): workIri]
-                        it.setGenerationDate(new Date())
-                        it.setGenerationProcess(generationProcess)
-                        whelk.storeAtomicUpdate(it, !loud, changedIn, changedBy, sum)
-                    }
-        }
-    }
-
-    private Collection<MergedWork> mergedWorks(Collection<Collection> titleClusters) {
-        def works = []
-        titleClusters.each { titleCluster ->
-            titleCluster.sort { it.numPages() }
-            WorkComparator c = new WorkComparator(WorkComparator.allFields(titleCluster))
-
-            works.addAll(partition(titleCluster, { Doc a, Doc b -> c.sameWork(a, b) })
-                    .each { work -> work.each { doc -> doc.removeComparisonProps() } }
-                    .collect { new MergedWork(work: buildWorkDocument(c.merge(it)), derivedFrom: it) })
-        }
-
-        return works
-    }
-
-
-    void subTitles() {
-        statistics.printOnShutdown(10)
-        run({ cluster ->
-            return {
-                String titles = cluster.collect(whelk.&getDocument).collect {
-                    getPathSafe(it.data, ['@graph', 1, 'hasTitle', 0, 'subtitle'])
-                }.grep().join('\n')
-
-                if (!titles.isBlank()) {
-                    println(titles + '\n')
-                }
-            }
-        })
-    }
-
-    void printInstanceValue(String field) {
-        run({ cluster ->
-            return {
-                String values = cluster.collect(whelk.&getDocument).collect {
-                    "${it.shortId}\t${getPathSafe(it.data, ['@graph', 1, field])}"
-                }.join('\n')
-
-                println(values + '\n')
-            }
-        })
-    }
-
-    void fictionNotFiction() {
-        run({ cluster ->
-            return {
-                Collection<Collection<Doc>> titleClusters = titleClusters(cluster)
-
-                for (titleCluster in titleClusters) {
-                    if (titleCluster.size() > 1) {
-                        def statuses = WorkComparator.compare(cluster)
-                        if (!statuses[DIFF].contains('contribution')) {
-                            String gf = titleCluster.collect { it.getDisplayText('genreForm') }.join(' ')
-                            if (gf.contains('marc/FictionNotFurtherSpecified') && gf.contains('marc/NotFictionNotFurtherSpecified')) {
-                                println(titleCluster.collect { it.getDoc().shortId }.join('\t'))
-                            }
-                        }
-                    }
-                }
-            }
-        })
-    }
-
-    void swedishFiction() {
-        def swedish = { Doc doc ->
-            Util.asList(doc.getWork()['language']).collect { it['@id'] } == ['https://id.kb.se/language/swe']
-        }
-
-        run({ cluster ->
-            return {
-                def c = loadDocs(cluster)
-                        .findAll(qualityMonographs)
-                        .findAll(swedish)
-                        .findAll { d -> !d.isDrama() }
-
-                if (c.any { it.isFiction() } && !c.any { it.isNotFiction() }) {
-                    println(c.collect { it.doc.shortId }.join('\t'))
-                }
-            }
-        })
-    }
-
-    void filterClusters(Closure<Collection<Doc>> predicate) {
-        run({ cluster ->
-            return {
-                if (predicate(loadDocs(cluster))) {
-                    println(cluster.join('\t'))
-                }
-            }
-        })
-    }
-
-    void filterDocs(Closure<Doc> predicate) {
-        run({ cluster ->
-            return {
-                def c = loadDocs(cluster).findAll(predicate)
-                if (c.size() > 0) {
-                    println(c.collect { it.doc.shortId }.join('\t'))
-                }
-            }
-        })
-    }
-
-    void translationNoTranslator() {
-        run({ cluster ->
-            return {
-                def c = loadDocs(cluster)
-
-                if (c) {
-                    if (c.any { it.isTranslation() }) {
-                        if (c.any { it.hasTranslator() }) {
-                            c = c.findAll { !it.isTranslationWithoutTranslator() }
-                        } else {
-                            int pages = c.first().numPages()
-                            if (c.any { it.numPages() != pages }) {
-                                return // drop cluster
-                            }
-                        }
-                    }
-                }
-
-                if (c.size() > 0) {
-                    println(c.collect { it.doc.shortId }.join('\t'))
-                }
-            }
-        })
-    }
-
-    void outputTitleClusters() {
-        run({ cluster ->
-            return {
-                titleClusters(cluster).findAll { it.size() > 1 }.each {
-                    println(it.collect { it.doc.shortId }.join('\t'))
-                }
-            }
-        })
-    }
-
-    void add9pu() {
-        statistics.printOnShutdown()
-        run({ cluster ->
-            return {
-                statistics.increment('add 9pu', 'clusters checked')
-                def docs = cluster
-                        .collect(whelk.&getDocument)
-                        .findAll()
-                        .collect { [doc: it, checksum: it.getChecksum(whelk.jsonld), changed: false] }
-
-                def ill = ['@id': Relator.ILLUSTRATOR.iri]
-                def pu = ['@id': Relator.PRIMARY_RIGHTS_HOLDER.iri]
-                def path = ['@graph', 1, 'instanceOf', 'contribution']
-
-                docs.each {
-                    Document d = it.doc
-
-                    statistics.increment('add 9pu', 'docs checked')
-
-                    getPathSafe(d.data, path, []).each { Map c ->
-                        def r = asList(c.role)
-
-                        if (pu in r || !(ill in r) || c.'@type' == 'PrimaryContribution')
-                            return
-
-                        for (Map other : docs) {
-                            Document od = other.doc
-
-                            def found9pu = false
-
-                            getPathSafe(od.data, path, []).each { Map oc ->
-                                if (asList(c.agent) == asList(oc.agent) && asList(oc.role).containsAll([ill, pu])) {
-                                    c.role = asList(c.role) + pu
-                                    found9pu = true
-                                    statistics.increment('add 9pu', "9pu added")
-                                    if (verbose) {
-                                        println("${d.shortId} <- ${od.shortId}")
-                                    }
-                                    return
-                                }
-                            }
-
-                            if (found9pu) {
-                                println(c)
-                                it.changed = true
-                                break
-                            }
-                        }
-                    }
-                }
-
-                docs.each {
-                    if (!dryRun && it.changed) {
-                        Document d = it.doc
-                        d.setGenerationDate(new Date())
-                        d.setGenerationProcess(generationProcess)
-                        whelk.storeAtomicUpdate(d, !loud, changedIn, changedBy, it.checksum)
-                    }
-                }
-            }
-        })
-    }
-
-    void fetchContributionFromRespStatement() {
-        def loadThingByIri = { String iri ->
-            // TODO: fix whelk, add load by IRI method
-            whelk.storage.loadDocumentByMainId(iri)?.with { doc ->
-                return (Map) doc.data['@graph'][1]
-            }
-        }
-
-        def loadIfLink = { it['@id'] ? loadThingByIri(it['@id']) : it }
-
-        statistics.printOnShutdown()
-        run({ cluster ->
-            return {
-                statistics.increment('fetch contribution from respStatement', 'clusters checked')
-                def docs = cluster
-                        .collect(whelk.&getDocument)
-                        .findAll()
-                        .collect { [doc: it, checksum: it.getChecksum(whelk.jsonld), changed: false] }
-
-                docs.each {
-                    Document d = it.doc
-                    def respStatement = getPathSafe(d.data, ['@graph', 1, 'responsibilityStatement'])
-                    if (!respStatement)
-                        return
-
-                    statistics.increment('fetch contribution from respStatement', 'docs checked')
-
-                    def contributionsInRespStmt = parseRespStatement(respStatement)
-                    def contribution = getPathSafe(d.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
-
-                    contribution.each { Map c ->
-                        asList(c.agent).each { a ->
-                            def matchedOnName = contributionsInRespStmt.find { n, r ->
-                                nameMatch(n, loadIfLink(a))
-                            }
-
-                            if (!matchedOnName)
-                                return
-
-                            // Contributor found locally, omit from further search
-                            contributionsInRespStmt.remove(matchedOnName.key)
-
-                            def dontAdd = { Relator relator, boolean isFirstStmtPart ->
-                                relator == Relator.UNSPECIFIED_CONTRIBUTOR
-                                        || isFirstStmtPart && relator == Relator.AUTHOR
-                                        && c.'@type' != 'PrimaryContribution'
-                            }
-
-                            def rolesInRespStatement = matchedOnName.value
-                                    .findResults { dontAdd(it) ? null : it.getV1() }
-
-                            if (rolesInRespStatement.isEmpty())
-                                return
-
-                            def rolesInContribution = asList(c.role).findAll { it.'@id' != Relator.UNSPECIFIED_CONTRIBUTOR.iri }
-
-                            // Replace Adapter with Editor
-                            it.changed |= rolesInRespStatement.removeAll { r ->
-                                r == Relator.EDITOR && rolesInContribution.findIndexOf {
-                                    it.'@id' == Relator.ADAPTER.iri
-                                }.with {
-                                    if (it == -1) {
-                                        return false
-                                    } else {
-                                        rolesInContribution[it]['@id'] = Relator.EDITOR.iri
-                                        return true
-                                    }
-                                }
-                            }
-
-                            if (rolesInRespStatement.size() <= rolesInContribution.size())
-                                return
-
-                            rolesInRespStatement.each { r ->
-                                def idLink = ['@id': r.iri]
-                                if (!(idLink in rolesInContribution)) {
-                                    rolesInContribution << idLink
-                                    it.changed = true
-                                    def roleShort = r.iri.split('/').last()
-                                    statistics.increment('fetch contribution from respStatement', "$roleShort roles specified")
-                                    if (verbose) {
-                                        println("${chipString(c, whelk)} (${d.shortId}) <- $roleShort")
-                                    }
-                                }
-                            }
-
-                            c.role = rolesInContribution
-                        }
-                    }
-
-                    def comparable = {
-                        it*.getV1().findResults { Relator r ->
-                            r != Relator.UNSPECIFIED_CONTRIBUTOR
-                                    ? ['@id': r.iri]
-                                    : null
-                        }
-                    }
-
-                    contributionsInRespStmt.each { name, roles ->
-                        for (Map other : docs) {
-                            Document od = other.doc
-                            def matched = getPathSafe(od.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
-                                    .find { Map c ->
-                                        asList(c.agent).any { a ->
-                                            loadIfLink(a).with { nameMatch(name, it) && !(it.description =~ /(?i)pseud/) }
-                                                    && comparable(roles).with { r -> !r.isEmpty() && asList(c.role).containsAll(r) }
-                                                    && Util.bestEncodingLevel.indexOf(d.getEncodingLevel()) <= Util.bestEncodingLevel.indexOf(od.getEncodingLevel())
-                                        }
-                                    }
-                            if (matched) {
-                                contribution << matched
-                                roles.each {
-                                    def roleShort = it.getV1().iri.split('/').last()
-                                    statistics.increment('fetch contribution from respStatement', "$roleShort found in cluster")
-                                }
-                                if (verbose) {
-                                    println("${d.shortId} <- ${chipString(matched, whelk)} (${od.shortId})")
-                                }
-                                it.changed = true
-                                break
-                            }
-                        }
-                    }
-                }
-
-                docs.each {
-                    if (!dryRun && it.changed) {
-                        Document d = it.doc
-                        d.setGenerationDate(new Date())
-                        d.setGenerationProcess(generationProcess)
-                        whelk.storeAtomicUpdate(d, !loud, changedIn, changedBy, it.checksum)
-                    }
-                }
-            }
-        }
-
-        )
-    }
-
-    void linkContribution() {
-        def loadThingByIri = { String iri ->
-            // TODO: fix whelk, add load by IRI method
-            whelk.storage.loadDocumentByMainId(iri)?.with { doc ->
-                return (Map) doc.data['@graph'][1]
-            }
-        }
-
-        def loadIfLink = { it['@id'] ? loadThingByIri(it['@id']) : it }
-
-        statistics.printOnShutdown()
-        run({ cluster ->
-            return {
-                statistics.increment('link contribution', 'clusters checked')
-                // TODO: check work language?
-                def docs = cluster
-                        .collect(whelk.&getDocument)
-                        .collect { [doc: it, checksum: it.getChecksum(whelk.jsonld), changed: false] }
-
-                List<Map> linked = []
-                docs.each { d ->
-                    def contribution = getPathSafe(d.doc.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
-                    contribution.each { Map c ->
-                        if (c.agent && c.agent['@id']) {
-                            loadThingByIri(c.agent['@id'])?.with { Map agent ->
-                                agent.roles = asList(c.role)
-                                linked << agent
-                            }
-                        }
-                    }
-                    statistics.increment('link contribution', 'docs checked')
-                }
-
-                docs.each {
-                    Document d = it.doc
-                    def contribution = getPathSafe(d.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
-                    contribution.each { Map c ->
-                        if (c.agent && !c.agent['@id']) {
-                            def l = linked.find {
-                                agentMatches(c.agent, it) && (!c.role || it.roles.containsAll(c.role))
-                            }
-                            if (l) {
-                                println("${d.shortId} ${chipString(c, whelk)} --> ${chipString(l, whelk)}")
-                                c.agent = ['@id': l['@id']]
-                                it.changed = true
-                                statistics.increment('link contribution', 'agents linked')
-                            } else if (verbose) {
-                                println("${d.shortId} NO MATCH: ${chipString(c, whelk)} ??? ${linked.collect { chipString(it, whelk) }}")
-                            }
-                        }
-                    }
-                }
-
-                List<Map> primaryAutAgents = []
-                docs.each {
-                    def contribution = getPathSafe(it.doc.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
-                    def p = contribution.findAll()
-                    contribution.each {
-                        if (it['@type'] == 'PrimaryContribution' && it['role'] == ['@id': 'https://id.kb.se/relator/author'] && it['agent']) {
-                            Map agent = loadIfLink(it['agent'])
-                            if (agent) {
-                                primaryAutAgents << agent
-                            }
-                        }
-                    }
-                }
-
-                docs.each {
-                    Document d = it.doc
-                    def contribution = getPathSafe(d.data, ['@graph', 1, 'instanceOf', 'contribution'], [])
-                    contribution.each { Map c ->
-                        if (c['@type'] == 'PrimaryContribution' && !c.role) {
-                            if (c.agent) {
-                                def agent = loadIfLink(c.agent)
-                                if (primaryAutAgents.any { agentMatches(agent, it) }) {
-                                    c.role = ['@id': 'https://id.kb.se/relator/author']
-                                    it.changed = true
-                                    statistics.increment('link contribution', 'author role added to primary contribution')
-                                }
-                            }
-                        }
-                    }
-                }
-
-                docs.each {
-                    if (!dryRun && it.changed) {
-                        Document d = it.doc
-                        d.setGenerationDate(new Date())
-                        d.setGenerationProcess(generationProcess)
-                        whelk.storeAtomicUpdate(d, !loud, changedIn, changedBy, it.checksum)
-                    }
-                }
-            }
-        })
-    }
-
-    static boolean agentMatches(Map local, Map linked) {
-        nameMatch(local, linked) && !yearMismatch(local, linked)
-    }
-
-    static boolean nameMatch(Object local, Map agent) {
-        def variants = [agent] + asList(agent.hasVariant)
-        def name = {
-            Map p ->
-                (p.givenName && p.familyName)
-                        ? normalize("${p.givenName} ${p.familyName}")
-                        : p.name ? normalize("${p.name}") : null
-        }
-
-        def localName = local instanceof Map ? name(local) : normalize(local)
-
-        localName && variants.any {
-            name(it) && localName == name(it)
-        }
-    }
-
-    static boolean yearMismatch(Map local, Map linked) {
-        def birth = { Map p -> p.lifeSpan?.with { (it.replaceAll(/[^\-0-9]/, '').split('-') as List)[0] } }
-        def death = { Map p -> p.lifeSpan?.with { (it.replaceAll(/[^\-0-9]/, '').split('-') as List)[1] } }
-        def b = birth(local) && birth(linked) && birth(local) != birth(linked)
-        def d = death(local) && death(linked) && death(local) != death(linked)
-        b || d
-    }
-
-    private void run(Function<List<String>, Runnable> f) {
-        ExecutorService s = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 4)
-
-        AtomicInteger i = new AtomicInteger()
-        clusters.eachLine() {
-            List<String> cluster = Arrays.asList(it.split(/[\t ]+/))
-
-            s.submit({
-                try {
-                    f.apply(cluster).run()
-                    int n = i.incrementAndGet()
-                    if (n % 100 == 0) {
-                        System.err.println("$n")
-                    }
-                }
-                catch (NoWorkException e) {
-                    //println("No work:" + e.getMessage())
-                }
-                catch (Exception e) {
-                    e.printStackTrace()
-                }
-            })
-        }
-
-        s.shutdown()
-        s.awaitTermination(1, TimeUnit.DAYS)
-    }
-
-    private Collection<Doc> loadDocs(Collection<String> cluster) {
-        whelk
-                .bulkLoad(cluster).values()
-                .collect { new Doc(whelk, it) }
-    }
-
-    private Collection<Collection<Doc>> titleClusters(Collection<String> cluster) {
-        loadDocs(cluster)
-                .findAll(qualityMonographs)
-                .each { it.addComparisonProps() }
-                .with { partitionByTitle(it) }
-                .findAll { it.size() > 1 }
-                .findAll { !it.any { doc -> doc.hasGenericTitle() } }
-                .sort { a, b -> a.first().mainEntityDisplayTitle() <=> b.first().mainEntityDisplayTitle() }
-    }
-
-    Collection<Collection<Doc>> partitionByTitle(Collection<Doc> docs) {
-        return partition(docs) { Doc a, Doc b ->
-            !a.getTitleVariants().intersect(b.getTitleVariants()).isEmpty()
-        }
-    }
-
-}
-
-class NoWorkException extends RuntimeException {
-    NoWorkException(String msg) {
-        super(msg)
-    }
-}
-
-
-
-
-
-
-
-
-
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Classification.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Classification.groovy
deleted file mode 100644
index 2dd9a10e7e..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Classification.groovy
+++ /dev/null
@@ -1,63 +0,0 @@
-package whelk.WorkMerging.compare
-
-class Classification extends StuffSet {
-    @Override
-    Object merge(Object a, Object b) {
-        return mergeCompatibleElements(super.merge(a, b)) { c1, c2 ->
-            String code1 = c1['code']
-            String code2 = c2['code']
-            if (!code1 || !code2) {
-                return
-            }
-            code1 = code1.trim()
-            code2 = code2.trim()
-
-            if (isSab(c1) && isSab(c2) && (code1.startsWith(code2) || code2.startsWith(code1))) {
-                def result = [
-                        '@type' : 'Classification',
-                        'code'  : code1.size() > code2.size() ? code1 : code2,
-                        inScheme: [
-                                '@type'  : 'ConceptScheme',
-                                'code'   : 'kssb'
-                        ]
-                ]
-                def version = maxSabVersion(c1, c2)
-                if (version) {
-                    result['inScheme']['version'] = version
-                }
-                return result
-            }
-            else if (isDewey(c1) && isDewey(c2) && code1 == code2) {
-                Map result = [:]
-                result.putAll(c1)
-                result.putAll(c2)
-                result['editionEnumeration'] = maxDeweyEdition(c1, c2)
-                return result
-            }
-        }
-    }
-
-    boolean isSab(Map c) {
-        c['inScheme'] && c['inScheme']['code'] == 'kssb'
-    }
-
-    String maxSabVersion(c1, c2) {
-        def v1 = c1['inScheme']['version'] ?: "-1"
-        def v2 = c2['inScheme']['version'] ?: "-1"
-        Integer.parseInt(v1) > Integer.parseInt(v2) ? v1 : v2
-    }
-
-    boolean isDewey(Map c) {
-        c['@type'] == 'ClassificationDdc'
-    }
-
-    String maxDeweyEdition(c1, c2) {
-        def v1 = c1['editionEnumeration']
-        def v2 = c2['editionEnumeration']
-        deweyEdition(v1) > deweyEdition(v2) ? v1 : v2
-    }
-
-    int deweyEdition(String edition) {
-        Integer.parseInt((edition ?: "0").replaceAll("[^0-9]", ""))
-    }
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Default.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Default.groovy
deleted file mode 100644
index 07e0635234..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Default.groovy
+++ /dev/null
@@ -1,13 +0,0 @@
-package whelk.WorkMerging.compare
-
-class Default implements FieldHandler {
-    @Override
-    boolean isCompatible(Object a, Object b) {
-        return false
-    }
-
-    @Override
-    Object merge(Object a, Object b) {
-        return a
-    }
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Extent.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Extent.groovy
deleted file mode 100644
index 078a3fee78..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Extent.groovy
+++ /dev/null
@@ -1,15 +0,0 @@
-package whelk.WorkMerging.compare;
-
-class Extent implements FieldHandler {
-
-    // TODO: allow one side missing extent (-1)?
-    @Override
-    boolean isCompatible(Object a, Object b) {
-        return true // a * 0.7 < b && a * 1.3 > b
-    }
-
-    @Override
-    Object merge(Object a, Object b) {
-        return b; // not part of final work
-    }
-}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/FieldHandler.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/FieldHandler.groovy
deleted file mode 100644
index 22a95fd2a9..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/FieldHandler.groovy
+++ /dev/null
@@ -1,12 +0,0 @@
-package whelk.WorkMerging.compare
-
-import whelk.WorkMerging.Doc
-
-interface FieldHandler {
-    boolean isCompatible(Object a, Object b)
-    Object merge(Object a, Object b)
-}
-
-interface ValuePicker extends FieldHandler {
-    Object pick(Collection<Doc> values)
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/GenreForm.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/GenreForm.groovy
deleted file mode 100644
index 690e9353ff..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/GenreForm.groovy
+++ /dev/null
@@ -1,36 +0,0 @@
-package whelk.WorkMerging.compare
-
-import whelk.WorkMerging.DocumentComparator
-
-//FIXME
-class GenreForm extends StuffSet {
-    private static final DocumentComparator c = new DocumentComparator()
-
-    // Terms that will be merged (values precede keys)
-    private static def norm = [
-            (['@id': 'https://id.kb.se/marc/NotFictionNotFurtherSpecified']): [
-                    ['@id': 'https://id.kb.se/marc/FictionNotFurtherSpecified'],
-                    ['@id': 'https://id.kb.se/marc/Autobiography'],
-                    ['@id': 'https://id.kb.se/marc/Biography']
-            ],
-            (['@id': 'https://id.kb.se/marc/FictionNotFurtherSpecified'])   : [
-                    ['@id': 'https://id.kb.se/marc/Poetry'],
-                    ['@id': 'https://id.kb.se/marc/Novel']
-            ],
-    ]
-
-    @Override
-    Object merge(Object a, Object b) {
-        return mergeCompatibleElements(super.merge(a, b).findAll { it.'@id' }) { gf1, gf2 ->
-            if (n(gf1, gf2)) {
-                gf2
-            } else if (n(gf2, gf1)) {
-                gf1
-            }
-        }
-    }
-
-    boolean n(a, b) {
-        norm[a]?.any { it == b || n(it, b) }
-    }
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/SameOrEmpty.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/SameOrEmpty.groovy
deleted file mode 100644
index f36f580773..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/SameOrEmpty.groovy
+++ /dev/null
@@ -1,21 +0,0 @@
-package whelk.WorkMerging.compare
-
-import static whelk.WorkMerging.Util.asList
-
-class SameOrEmpty implements FieldHandler {
-    Object link
-
-    SameOrEmpty(String iri) {
-        this.link = [['@id': iri]]
-    }
-
-    @Override
-    boolean isCompatible(Object a, Object b) {
-        (!a && asList(b) == link) || (!b && asList(a) == link)
-    }
-
-    @Override
-    Object merge(Object a, Object b) {
-        return a ?: b
-    }
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/StuffSet.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/StuffSet.groovy
deleted file mode 100644
index 92262086f7..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/StuffSet.groovy
+++ /dev/null
@@ -1,38 +0,0 @@
-package whelk.WorkMerging.compare
-
-
-import java.util.function.BiFunction
-
-import static whelk.WorkMerging.Util.asList
-
-class StuffSet implements FieldHandler {
-    @Override
-    boolean isCompatible(Object a, Object b) {
-        true
-    }
-
-    @Override
-    Object merge(Object a, Object b) {
-        return ((asList(a) as Set) + (asList(b) as Set)).collect()
-    }
-
-    static Object mergeCompatibleElements(Object o, BiFunction<Object, Object, Object> s) {
-        boolean changed = false
-        List result = []
-        asList(o).each {
-            def merged = null
-            for (int i = 0 ; i < result.size() ; i++) {
-                merged = s.apply(result[i], it)
-                if (merged) {
-                    result[i] = merged
-                    changed = true
-                    break
-                }
-            }
-            if (merged == null) {
-                result << it
-            }
-        }
-        return changed ? mergeCompatibleElements(result, s) : result
-    }
-}
\ No newline at end of file
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Subject.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Subject.groovy
deleted file mode 100644
index e69fb633e7..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/Subject.groovy
+++ /dev/null
@@ -1,8 +0,0 @@
-package whelk.WorkMerging.compare
-
-class Subject extends StuffSet {
-    @Override
-    Object merge(Object a, Object b) {
-        return super.merge(a, b).findAll { it.'@id' || it.'@type' == 'ComplexSubject' }
-    }
-}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/TranslationOf.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/TranslationOf.groovy
deleted file mode 100644
index 73836e6fee..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/TranslationOf.groovy
+++ /dev/null
@@ -1,22 +0,0 @@
-package whelk.WorkMerging.compare
-
-import whelk.WorkMerging.DocumentComparator
-
-class TranslationOf implements FieldHandler {
-    DocumentComparator c = new DocumentComparator()
-    
-    @Override
-    boolean isCompatible(Object a, Object b) {
-        // @type is sometimes Work, sometimes Text. Should not matter for comparison
-        (!a && !b) || a && b && a instanceof Map && b instanceof Map && c.isEqual(noType(a), noType(b))
-    }
-
-    @Override
-    Object merge(Object a, Object b) {
-        return a // TODO: prefer one @type over another?
-    }
-    
-    Map noType(Map m) {
-        m.findAll { k, v -> k != '@type' }
-    }
-}
diff --git a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/WorkTitle.groovy b/whelk-core/src/main/groovy/whelk/WorkMerging/compare/WorkTitle.groovy
deleted file mode 100644
index 4c948af25a..0000000000
--- a/whelk-core/src/main/groovy/whelk/WorkMerging/compare/WorkTitle.groovy
+++ /dev/null
@@ -1,23 +0,0 @@
-package whelk.WorkMerging.compare
-
-import whelk.WorkMerging.Doc
-import whelk.WorkMerging.Util
-import org.apache.commons.lang3.NotImplementedException
-
-class WorkTitle implements ValuePicker {
-
-    @Override
-    boolean isCompatible(Object a, Object b) {
-        return !a || !b || !Util.getTitleVariants(a).intersect(Util.getTitleVariants(b)).isEmpty()
-    }
-
-    @Override
-    Object merge(Object a, Object b) {
-        throw new NotImplementedException('')
-    }
-    
-    @Override
-    Object pick(Collection<Doc> values) {
-        return Util.bestTitle(values)
-    }
-}