From 4fa033c64c162b7cc3d32eeaa8cccf7daccde96f Mon Sep 17 00:00:00 2001 From: Mike Bryant Date: Fri, 29 Aug 2014 12:40:55 +0100 Subject: [PATCH 1/4] Initial commit of id-relativizing transformer --- .gitignore | 20 ++++ pom.xml | 1 + relativize-identifiers/README.md | 10 ++ relativize-identifiers/pom.xml | 72 ++++++++++++++ .../RelativizeIdentifiers.java | 96 +++++++++++++++++++ .../RelativizeIdentifiersTest.java | 67 +++++++++++++ .../test/resources/absoluteids-hyphens.xml | 37 +++++++ .../src/test/resources/absoluteids-spaces.xml | 37 +++++++ 8 files changed, 340 insertions(+) create mode 100644 .gitignore create mode 100644 relativize-identifiers/README.md create mode 100644 relativize-identifiers/pom.xml create mode 100644 relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java create mode 100644 relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java create mode 100644 relativize-identifiers/src/test/resources/absoluteids-hyphens.xml create mode 100644 relativize-identifiers/src/test/resources/absoluteids-spaces.xml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bd31b8b --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +nb-configuration.xml +nbactions.xml +*.class +*.pyc +*.java.orig +*.xml.orig +*.~lock.*# + +# Package Files # +*.jar +*.war +*.ear + +# Ignore target +**/target/* + +# Intellij +*.iml +.idea + diff --git a/pom.xml b/pom.xml index 549a98c..b5e4680 100644 --- a/pom.xml +++ b/pom.xml @@ -14,6 +14,7 @@ add-language-of-descriptions identify-main-identifier leave-only-one-identifier + relativize-identifiers diff --git a/relativize-identifiers/README.md b/relativize-identifiers/README.md new file mode 100644 index 0000000..51bdb44 --- /dev/null +++ b/relativize-identifiers/README.md @@ -0,0 +1,10 @@ +relativize-identifiers +========================= + +Part of the ehri-ead-preprocessing tools to normalise EAD files before importing into the EHRI database. + +precondition: The EAD file has absolute identifiers, where unitids in each c-level include the full ID of their parent unitid +postcondition: The EAD file has relative identifiers. + +usage: +java -jar relativize-identifiers/target/relativize-identifier-1.0-SNAPSHOT-jar-with-dependencies.jar diff --git a/relativize-identifiers/pom.xml b/relativize-identifiers/pom.xml new file mode 100644 index 0000000..db4d45d --- /dev/null +++ b/relativize-identifiers/pom.xml @@ -0,0 +1,72 @@ + + + 4.0.0 + + ehri-project-preprocess + ead-preprocessing + 1.0 + + ehri-project + relativize-identifiers + 1.0-SNAPSHOT + relativize-identifiers + http://maven.apache.org + + UTF-8 + + + + junit + junit + 4.10 + test + + + + + stax + stax + 1.2.0 + + + stax + stax-api + 1.0.1 + + + org.apache.commons + commons-io + 1.3.2 + + + + + + + + maven-assembly-plugin + + + + eu.ehri.relativize_identifiers.RelativizeIdentifiers + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + + diff --git a/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java b/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java new file mode 100644 index 0000000..30f323c --- /dev/null +++ b/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java @@ -0,0 +1,96 @@ +package eu.ehri.relativize_identifiers; + +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; + +import java.util.Stack; +import java.util.regex.Pattern; +import javax.xml.parsers.FactoryConfigurationError; +import javax.xml.stream.XMLEventFactory; +import javax.xml.stream.XMLEventReader; +import javax.xml.stream.XMLEventWriter; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.events.Characters; +import javax.xml.stream.events.XMLEvent; + + +public class RelativizeIdentifiers { + + public final static String SUFFIX = "_relid.xml"; + + static XMLEventFactory eventFactory = XMLEventFactory.newInstance(); + static XMLOutputFactory factory = XMLOutputFactory.newInstance(); + + + public static void main(String[] args) throws XMLStreamException, javax.xml.stream.FactoryConfigurationError, IOException { + String eadfile = args[0]; + String outputfile = eadfile.replace(".xml", SUFFIX); + RelativizeIdentifiers.relativizeIdentifiers(eadfile, new FileWriter(outputfile)); + } + + /** + * precondition: The EAD file has absolute identifiers, where unitids in each c-level + * include the full ID of their parent unitid + * postcondition: The EAD file has relative identifiers. + * + * @param eadfile the name of the ead file + * @throws javax.xml.stream.XMLStreamException + * @throws javax.xml.parsers.FactoryConfigurationError + * @throws java.io.IOException + */ + public static String relativizeIdentifiers(String eadfile, Writer outputWriter) + throws XMLStreamException, FactoryConfigurationError, IOException { + + FileInputStream fileInputStreamEAD = new FileInputStream(eadfile); + XMLEventWriter writer = factory.createXMLEventWriter(outputWriter); + + Stack idStack = new Stack(); + Pattern childPattern = Pattern.compile("c\\d\\d"); + + String thisId; + + XMLEventReader xmlEventReaderEAD = XMLInputFactory.newInstance().createXMLEventReader(fileInputStreamEAD); + while (xmlEventReaderEAD.hasNext()) { + XMLEvent event = xmlEventReaderEAD.nextEvent(); + + if (event.isStartElement()) { + if (event.asStartElement().getName().getLocalPart().equals("unitid")) { + writer.add(event); + XMLEvent nextEvent = xmlEventReaderEAD.nextEvent(); + if (nextEvent.isCharacters()) { + thisId = nextEvent.asCharacters().getData(); + if (!idStack.empty() && thisId.contains(idStack.peek())) { + // Replace the ID and any non-ID trailing chars, such as spaces, + // colons, or dashes. + String regex = "^" + Pattern.quote(idStack.peek()) + "[\\s\\-:_]*"; + String newId = thisId.replaceFirst(regex, ""); + Characters chars = eventFactory.createCharacters(newId); + writer.add(chars); + } else { + writer.add(nextEvent); + } + idStack.push(thisId); + } + } else { + writer.add(event); + } + } else if (event.isEndElement()) { + if (event.asEndElement().getName().getLocalPart() + .matches(childPattern.pattern())) { + idStack.pop(); + } + writer.add(event); + } else { + writer.add(event); + } + } + + writer.close(); + xmlEventReaderEAD.close(); + return null; + } +} diff --git a/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java b/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java new file mode 100644 index 0000000..b690c3e --- /dev/null +++ b/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java @@ -0,0 +1,67 @@ +package eu.ehri.relativize_identifiers; + +import org.junit.Before; +import org.junit.Test; +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.transform.dom.DOMSource; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathFactory; +import java.io.*; +import java.net.URISyntaxException; +import java.net.URL; + +import static org.junit.Assert.assertEquals; + +/** + * @author Mike Bryant (http://github.com/mikesname) + */ +public class RelativizeIdentifiersTest { + + DocumentBuilder builder; + XPath xpath; + + @Before + public void setUp() throws Exception { + builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + xpath = XPathFactory.newInstance().newXPath(); + } + + @Test + public void testRelativizeIdentifiersWithSpaces() throws Exception { + Document outDoc = getOutputDocument("/absoluteids-spaces.xml"); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); + assertEquals("2", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03/did/unitid").evaluate(outDoc)); + } + + @Test + public void testRelativizeIdentifiersWithHyphens() throws Exception { + Document outDoc = getOutputDocument("/absoluteids-hyphens.xml"); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); + assertEquals("2", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03/did/unitid").evaluate(outDoc)); + } + + private Document getOutputDocument(String resourceName) throws URISyntaxException, XMLStreamException, + IOException, SAXException { + URL resource = RelativizeIdentifiersTest.class.getResource(resourceName); + String path = new File(resource.toURI()).getAbsolutePath(); + StringWriter stringWriter = new StringWriter(); + RelativizeIdentifiers.relativizeIdentifiers(path, stringWriter); + stringWriter.close(); + ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(stringWriter.getBuffer().toString().getBytes()); + return builder.parse(byteArrayInputStream); + } +} diff --git a/relativize-identifiers/src/test/resources/absoluteids-hyphens.xml b/relativize-identifiers/src/test/resources/absoluteids-hyphens.xml new file mode 100644 index 0000000..4e087d6 --- /dev/null +++ b/relativize-identifiers/src/test/resources/absoluteids-hyphens.xml @@ -0,0 +1,37 @@ + + + + + + + root + + + + + root-1 + + + + root-1-1 + + + + root-1-1-1 + + + + + + root-1-2 + + + + root-1-2-1 + + + + + + + \ No newline at end of file diff --git a/relativize-identifiers/src/test/resources/absoluteids-spaces.xml b/relativize-identifiers/src/test/resources/absoluteids-spaces.xml new file mode 100644 index 0000000..64a33e6 --- /dev/null +++ b/relativize-identifiers/src/test/resources/absoluteids-spaces.xml @@ -0,0 +1,37 @@ + + + + + + + root + + + + + root 1 + + + + root 1 1 + + + + root 1 1 1 + + + + + + root 1 2 + + + + root 1 2 1 + + + + + + + \ No newline at end of file From 896542ec260f4794e3762797256d847aaf21c1d0 Mon Sep 17 00:00:00 2001 From: Mike Bryant Date: Fri, 29 Aug 2014 13:39:08 +0100 Subject: [PATCH 2/4] Handle slashes, and add a real-life JMP test case --- .../RelativizeIdentifiers.java | 3 +- .../RelativizeIdentifiersTest.java | 13 +++ .../src/test/resources/absoluteids-spaces.xml | 7 ++ .../src/test/resources/wp2_jmp_ead.xml | 95 +++++++++++++++++++ 4 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 relativize-identifiers/src/test/resources/wp2_jmp_ead.xml diff --git a/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java b/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java index 30f323c..d00b718 100644 --- a/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java +++ b/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java @@ -66,8 +66,9 @@ public static String relativizeIdentifiers(String eadfile, Writer outputWriter) if (!idStack.empty() && thisId.contains(idStack.peek())) { // Replace the ID and any non-ID trailing chars, such as spaces, // colons, or dashes. - String regex = "^" + Pattern.quote(idStack.peek()) + "[\\s\\-:_]*"; + String regex = "^" + Pattern.quote(idStack.peek()) + "[\\s\\-:_\\/]*"; String newId = thisId.replaceFirst(regex, ""); + System.out.println("ID: " + thisId + " -> " + newId); Characters chars = eventFactory.createCharacters(newId); writer.add(chars); } else { diff --git a/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java b/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java index b690c3e..de03833 100644 --- a/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java +++ b/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java @@ -41,6 +41,7 @@ public void testRelativizeIdentifiersWithSpaces() throws Exception { assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); assertEquals("2", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/did/unitid").evaluate(outDoc)); assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03/did/unitid").evaluate(outDoc)); + assertEquals("2 root 1 2", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03[2]/did/unitid").evaluate(outDoc)); } @Test @@ -54,6 +55,18 @@ public void testRelativizeIdentifiersWithHyphens() throws Exception { assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03/did/unitid").evaluate(outDoc)); } + @Test + public void testRelativizeIdentifiersWithSlashes() throws Exception { + Document outDoc = getOutputDocument("/wp2_jmp_ead.xml"); + assertEquals("COLLECTION.JMP.SHOAH/T", xpath.compile("/ead/archdesc/did/unitid").evaluate(outDoc)); + assertEquals("2", xpath.compile("/ead/archdesc/dsc/c01[1]/did/unitid").evaluate(outDoc)); + assertEquals("A", xpath.compile("/ead/archdesc/dsc/c01[1]/c02[1]/did/unitid").evaluate(outDoc)); + assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01[1]/c02[1]/c03[1]/did/unitid").evaluate(outDoc)); + assertEquals("a", xpath.compile("/ead/archdesc/dsc/c01[1]/c02[1]/c03[1]/c04[1]/did/unitid").evaluate(outDoc)); + assertEquals("028", xpath.compile("/ead/archdesc/dsc/c01[1]/c02[1]/c03[1]/c04[1]/c05[1]/did/unitid").evaluate + (outDoc)); + } + private Document getOutputDocument(String resourceName) throws URISyntaxException, XMLStreamException, IOException, SAXException { URL resource = RelativizeIdentifiersTest.class.getResource(resourceName); diff --git a/relativize-identifiers/src/test/resources/absoluteids-spaces.xml b/relativize-identifiers/src/test/resources/absoluteids-spaces.xml index 64a33e6..a80b2ef 100644 --- a/relativize-identifiers/src/test/resources/absoluteids-spaces.xml +++ b/relativize-identifiers/src/test/resources/absoluteids-spaces.xml @@ -30,6 +30,13 @@ root 1 2 1 + + + + root 1 2 2 root 1 2 + + diff --git a/relativize-identifiers/src/test/resources/wp2_jmp_ead.xml b/relativize-identifiers/src/test/resources/wp2_jmp_ead.xml new file mode 100644 index 0000000..a46bf1d --- /dev/null +++ b/relativize-identifiers/src/test/resources/wp2_jmp_ead.xml @@ -0,0 +1,95 @@ + + + + + + COLLECTION.JMP.SHOAH/T + + + Terezín/Theresienstadt + Shoah History Department, Jewish Museum in Prague + + + 13. 12. 2013 + + + + + + Terezín/Theresienstadt + COLLECTION.JMP.SHOAH/T + + +

The archive records from the Terezín ghetto (24 November 1941 – 8 May 1945, and + from the period after the liberation) are preserved only in fragments.

+
+ + + + Documents + COLLECTION.JMP.SHOAH/T/2 + + + + Documents from the period of occupation + COLLECTION.JMP.SHOAH/T/2/A + + + + Preparations for implementing the plan for the Final + Solution to the Jewish Question in the Protectorate of + Bohemia and Moravia. + COLLECTION.JMP.SHOAH/T/2/A/1 + + + + Considerations about setting up a ghetto for Jews + in Protectorate of Bohemia and Moravia + COLLECTION.JMP.SHOAH/T/2/A/1a + + + + Statistics relating to Jewish women aged + 20-45 years + COLLECTION.JMP.SHOAH/T/2/A/1a/028 + + + + Statistics relating to Jewish women + aged 20-45 years in the Protectorate + DOCUMENT.JMP.SHOAH/T/2/A/1a/028 + September 21 1941 + + German + + + 2 Folio; 2 Pages + + + + + + Keywords + Women + + + Places + Praha + Brno + + + + + + + + + +
+
\ No newline at end of file From 9a8a58199e153b7377416c761041d2cb4251e07f Mon Sep 17 00:00:00 2001 From: Mike Bryant Date: Fri, 29 Aug 2014 17:46:00 +0100 Subject: [PATCH 3/4] Cleanup --- .../relativize_identifiers/RelativizeIdentifiers.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java b/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java index d00b718..affd99d 100644 --- a/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java +++ b/relativize-identifiers/src/main/java/eu/ehri/relativize_identifiers/RelativizeIdentifiers.java @@ -56,10 +56,9 @@ public static String relativizeIdentifiers(String eadfile, Writer outputWriter) XMLEventReader xmlEventReaderEAD = XMLInputFactory.newInstance().createXMLEventReader(fileInputStreamEAD); while (xmlEventReaderEAD.hasNext()) { XMLEvent event = xmlEventReaderEAD.nextEvent(); - + writer.add(event); if (event.isStartElement()) { if (event.asStartElement().getName().getLocalPart().equals("unitid")) { - writer.add(event); XMLEvent nextEvent = xmlEventReaderEAD.nextEvent(); if (nextEvent.isCharacters()) { thisId = nextEvent.asCharacters().getData(); @@ -68,7 +67,6 @@ public static String relativizeIdentifiers(String eadfile, Writer outputWriter) // colons, or dashes. String regex = "^" + Pattern.quote(idStack.peek()) + "[\\s\\-:_\\/]*"; String newId = thisId.replaceFirst(regex, ""); - System.out.println("ID: " + thisId + " -> " + newId); Characters chars = eventFactory.createCharacters(newId); writer.add(chars); } else { @@ -76,17 +74,12 @@ public static String relativizeIdentifiers(String eadfile, Writer outputWriter) } idStack.push(thisId); } - } else { - writer.add(event); } } else if (event.isEndElement()) { if (event.asEndElement().getName().getLocalPart() .matches(childPattern.pattern())) { idStack.pop(); } - writer.add(event); - } else { - writer.add(event); } } From 2cbec87008084526874048e8085677cde5de5372 Mon Sep 17 00:00:00 2001 From: Mike Bryant Date: Fri, 29 Aug 2014 17:53:10 +0100 Subject: [PATCH 4/4] Test we don't mangle already relative IDs --- .../RelativizeIdentifiersTest.java | 12 +++++- .../src/test/resources/relativeids.xml | 37 +++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 relativize-identifiers/src/test/resources/relativeids.xml diff --git a/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java b/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java index de03833..03ae828 100644 --- a/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java +++ b/relativize-identifiers/src/test/java/eu/ehri/relativize_identifiers/RelativizeIdentifiersTest.java @@ -38,7 +38,6 @@ public void testRelativizeIdentifiersWithSpaces() throws Exception { assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/did/unitid").evaluate(outDoc)); assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/did/unitid").evaluate(outDoc)); assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); - assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); assertEquals("2", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/did/unitid").evaluate(outDoc)); assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03/did/unitid").evaluate(outDoc)); assertEquals("2 root 1 2", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03[2]/did/unitid").evaluate(outDoc)); @@ -50,7 +49,6 @@ public void testRelativizeIdentifiersWithHyphens() throws Exception { assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/did/unitid").evaluate(outDoc)); assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/did/unitid").evaluate(outDoc)); assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); - assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); assertEquals("2", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/did/unitid").evaluate(outDoc)); assertEquals("1", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03/did/unitid").evaluate(outDoc)); } @@ -67,6 +65,16 @@ public void testRelativizeIdentifiersWithSlashes() throws Exception { (outDoc)); } + @Test + public void testRelativizeIdentifiersAlreadyRelative() throws Exception { + Document outDoc = getOutputDocument("/relativeids.xml"); + assertEquals("c1", xpath.compile("/ead/archdesc/dsc/c01/did/unitid").evaluate(outDoc)); + assertEquals("c2-1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/did/unitid").evaluate(outDoc)); + assertEquals("c3-1", xpath.compile("/ead/archdesc/dsc/c01/c02[1]/c03/did/unitid").evaluate(outDoc)); + assertEquals("c2-2", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/did/unitid").evaluate(outDoc)); + assertEquals("c3-1", xpath.compile("/ead/archdesc/dsc/c01/c02[2]/c03/did/unitid").evaluate(outDoc)); + } + private Document getOutputDocument(String resourceName) throws URISyntaxException, XMLStreamException, IOException, SAXException { URL resource = RelativizeIdentifiersTest.class.getResource(resourceName); diff --git a/relativize-identifiers/src/test/resources/relativeids.xml b/relativize-identifiers/src/test/resources/relativeids.xml new file mode 100644 index 0000000..69187c1 --- /dev/null +++ b/relativize-identifiers/src/test/resources/relativeids.xml @@ -0,0 +1,37 @@ + + + + + + + root + + + + + c1 + + + + c2-1 + + + + c3-1 + + + + + + c2-2 + + + + c3-1 + + + + + + + \ No newline at end of file