From 55432e989faed41441c36e204b23ac1c38ab7af1 Mon Sep 17 00:00:00 2001 From: Artem Kozlov Date: Mon, 1 Apr 2024 21:45:47 +0300 Subject: [PATCH 1/3] Fix association table xpath for custom Literal generators. 1. small refactoring, to remove duplicated code 2. simplify Literal xpath generation for the association table 3. add unit test for association table generation Signed-off-by: Artem Kozlov --- .../ics/isl/x3ml/engine/GeneratorContext.java | 82 ++++------ .../forth/ics/isl/x3ml/engine/XPathInput.java | 52 ------ .../eu/delving/x3ml/TestAssociationTable.java | 73 +++++++++ .../01_date-expected-association-table.xml | 38 +++++ .../01_date-generator-policy.xml | 30 ++++ .../association_table/01_date-input.xml | 13 ++ .../association_table/01_date-mappings.x3ml | 153 ++++++++++++++++++ 7 files changed, 339 insertions(+), 102 deletions(-) create mode 100644 src/test/java/eu/delving/x3ml/TestAssociationTable.java create mode 100644 src/test/resources/association_table/01_date-expected-association-table.xml create mode 100644 src/test/resources/association_table/01_date-generator-policy.xml create mode 100644 src/test/resources/association_table/01_date-input.xml create mode 100644 src/test/resources/association_table/01_date-mappings.x3ml diff --git a/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java b/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java index cc9130e..440bdb3 100644 --- a/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java +++ b/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java @@ -19,6 +19,7 @@ Licensed to the Apache Software Foundation (ASF) under one package gr.forth.ics.isl.x3ml.engine; import gr.forth.ics.isl.x3ml.X3MLEngine; +import gr.forth.ics.isl.x3ml.engine.X3ML.GeneratedType; import org.w3c.dom.Node; import static gr.forth.ics.isl.x3ml.engine.X3ML.ArgValue; import static gr.forth.ics.isl.x3ml.engine.X3ML.Condition; @@ -170,9 +171,7 @@ public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMul }); put(variable_deprecated,VariableScope.WITHIN_MAPPING, generatedValue); context.putGeneratedValue(extractXPath(node) + unique+"-"+variable, generatedValue); - if(X3MLEngine.ENABLE_ASSOCIATION_TABLE){ - this.createAssociationTable(generatedValue, null, extractAssocTableXPath(node)); - } + this.createAssociationTable(generatedValue, generator, node); } }else{ // String nodeName = extractXPath(node) + unique+"-"+typeAwareVar; @@ -197,21 +196,9 @@ public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMul } } }); - GeneratedValue genArg=null; - if(generator.getName().equalsIgnoreCase("Literal")){ - genArg = context.policy().generate(generator, new Generator.ArgValues() { - @Override - public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMultipleValues) { - return context.input().evaluateArgument2(node, index, generator, name, sourceType); - - } - }); - } log.debug("put generated value: {}\t{}", nodeName, generatedValue); context.putGeneratedValue(nodeName, generatedValue); - if(X3MLEngine.ENABLE_ASSOCIATION_TABLE){ - this.createAssociationTable(generatedValue, genArg, extractAssocTableXPath(node)); - } + this.createAssociationTable(generatedValue, generator, node); } } } @@ -254,20 +241,8 @@ public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMul } } }); - GeneratedValue genArg=null; - if(generator.getName().equalsIgnoreCase("Literal")){ - genArg = context.policy().generate(generator, new Generator.ArgValues() { - @Override - public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMultipleValues) { - return context.input().evaluateArgument2(node, index, generator, name, sourceType); - - } - }); - } context.putGeneratedValue(nodeName, generatedValue); - if(X3MLEngine.ENABLE_ASSOCIATION_TABLE){ - this.createAssociationTable(generatedValue, genArg, extractAssocTableXPath(node)); - } + this.createAssociationTable(generatedValue, generator, node); } } } @@ -315,16 +290,6 @@ public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMul } } }); - GeneratedValue genArg=null; - if(generator.getName().equalsIgnoreCase("Literal")){ - genArg = context.policy().generate(generator, new Generator.ArgValues() { - @Override - public ArgValue getArgValue(String name, SourceType sourceType, boolean mergeMultipleValues) { - return context.input().evaluateArgument2(node, index, generator, name, sourceType); - - } - }); - } } context.putGeneratedValue(nodeName, generatedValue); @@ -338,21 +303,38 @@ public boolean conditionFails(Condition condition, GeneratorContext context) { return condition != null && condition.failure(context); } - private void createAssociationTable(GeneratedValue generatedValue, GeneratedValue generatedArg, String xpathProper){ - String value=""; - if(generatedValue.type == X3ML.GeneratedType.LITERAL){ - value="\""+generatedValue.text+"\""; + private void createAssociationTable(GeneratedValue generatedValue, GeneratorElement generator, Node node){ + if(X3MLEngine.ENABLE_ASSOCIATION_TABLE) { + String xpathProper = extractAssocTableXPath(node); + + String value=""; + if(generatedValue.type == GeneratedType.LITERAL || generatedValue.type == GeneratedType.TYPED_LITERAL) { + // we assume that there is argument named text for generators that generate Literal or Typed Literals + // and that this argument is of type xpath + String generatedArg = + generator.getArgs() + .stream() + .filter(arg -> SourceType.xpath.name().equals(arg.type)) + .findFirst() + // in case of multiple intermediary elements we re-write xpath to always point to the first one + // because this is a default behaviour of non merging generators + .map(arg -> arg.value.replaceAll("/", "[1]/")) + .orElse(null); + + value="\""+generatedValue.text+"\""; + if(generatedArg != null) + xpathProper+="/"+generatedArg; + else + xpathProper+="/text()"; + } + else if(generatedValue.type == X3ML.GeneratedType.URI) { + value=generatedValue.text; + } - if(generatedArg!=null) - xpathProper+="/"+generatedArg.text; - else - xpathProper+="/text()"; - } - else if(generatedValue.type == X3ML.GeneratedType.URI) - value=generatedValue.text; if(xpathProper!=null){ //Needs a little more inspection this AssociationTable.addEntry(xpathProper,value); } + } } /**Adds a new entry in the association table with the given XPATH expression and diff --git a/src/main/java/gr/forth/ics/isl/x3ml/engine/XPathInput.java b/src/main/java/gr/forth/ics/isl/x3ml/engine/XPathInput.java index a046d5c..cf18be3 100644 --- a/src/main/java/gr/forth/ics/isl/x3ml/engine/XPathInput.java +++ b/src/main/java/gr/forth/ics/isl/x3ml/engine/XPathInput.java @@ -151,58 +151,6 @@ public X3ML.ArgValue evaluateArgument(Node node, int index, GeneratorElement gen } return value; } - - public X3ML.ArgValue evaluateArgument2(Node node, int index, GeneratorElement generatorElement, String argName, SourceType defaultType) { - X3ML.GeneratorArg foundArg = null; - SourceType type = defaultType; - if (generatorElement.getArgs() != null) { - for (X3ML.GeneratorArg arg : generatorElement.getArgs()) { - if (arg.name == null) { - arg.name = "text"; - } - if (arg.name.equals(argName)) { - foundArg = arg; - type = sourceType(arg.type, defaultType); - } - } - - } - X3ML.ArgValue value = null; - switch (type) { - - case xpath: - if (foundArg == null) { - return null; - } - String lang = getLanguageFromSource(node); - if (lang == null) { - lang = languageFromMapping; - } - if (!foundArg.value.isEmpty()) { - value = argVal( foundArg.value.replaceAll("/", "[1]/"), lang); - if (value.string.isEmpty()) { - throw exception("Empty result for arg " + foundArg.name + " at node " + node.getNodeName() + " in generator\n" + generatorElement); - } - } - break; - case constant: - if (foundArg == null) { - return null; - } - value = argVal(foundArg.value, languageFromMapping); - break; - case position: - value = argVal(String.valueOf(index), null); - break; - case entireInput: - value=argVal(this.getEntireXpathInput(), languageFromMapping); - entireInputExportedRefUri=domainURIForNamedgraps; - break; - default: - throw new RuntimeException("Not implemented"); - } - return value; - } /** Returns the value that can be found in the corresponding node, after the evaluation * of the given XPath expression. More specifically it returns the results after diff --git a/src/test/java/eu/delving/x3ml/TestAssociationTable.java b/src/test/java/eu/delving/x3ml/TestAssociationTable.java new file mode 100644 index 0000000..89a64ad --- /dev/null +++ b/src/test/java/eu/delving/x3ml/TestAssociationTable.java @@ -0,0 +1,73 @@ +/*============================================================================== +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +==============================================================================*/ +package eu.delving.x3ml; + +import static eu.delving.x3ml.AllTests.document; +import static eu.delving.x3ml.AllTests.engine; +import static eu.delving.x3ml.AllTests.policy; +import static eu.delving.x3ml.AllTests.resource; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import org.apache.commons.io.IOUtils; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import gr.forth.ics.isl.x3ml.X3MLEngine; +import gr.forth.ics.isl.x3ml.engine.GeneratorContext; +import gr.forth.ics.isl.x3ml_reverse_utils.AssociationTable; + +public class TestAssociationTable { + + @BeforeClass + public static void setUp() { + X3MLEngine.ENABLE_ASSOCIATION_TABLE = true; + } + + @AfterClass + public static void tearDown() { + // because this flag is a static variable we need to make sure that we reset it + // after we are done with association table tests + X3MLEngine.ENABLE_ASSOCIATION_TABLE = false; + } + + @Before + public void before() { + AssociationTable.clearAssociationTable(); + } + + @Test + public void testCustomLiteralGenerator() throws IOException { + // test to check that proper xpath is generated not only for default Literal + // generator but also for a custom one like DateNormalizer + X3MLEngine engine = engine("/association_table/01_date-mappings.x3ml"); + X3MLEngine.Output output = engine.execute(document("/association_table/01_date-input.xml"), + policy("/association_table/01_date-generator-policy.xml")); + output.close(); + + String expected = IOUtils.toString( + resource("/association_table/01_date-expected-association-table.xml"), + StandardCharsets.UTF_8); + assertEquals(expected, GeneratorContext.exportAssociationTableToString()); + } +} diff --git a/src/test/resources/association_table/01_date-expected-association-table.xml b/src/test/resources/association_table/01_date-expected-association-table.xml new file mode 100644 index 0000000..17e0a08 --- /dev/null +++ b/src/test/resources/association_table/01_date-expected-association-table.xml @@ -0,0 +1,38 @@ + + + /record[1] + http://vocab.getty.edu/aat/300133025 + + + /record[1] + https://artresearch.net/resource/frick/work/991013309269707141 + + + /record[1]/controlfield[2] + https://artresearch.net/resource/frick/work/991013309269707141/production + + + /record[1]/controlfield[2] + https://artresearch.net/resource/frick/work/991013309269707141/production/timespan + + + /record[1]/controlfield[2]/substring(substring-after(text(), 'k'), 1, 4) + "1527-12-31T23:59:59" + + + /record[1]/controlfield[2]/substring(substring-after(text(), 'k'), 5, 4) + "1527-01-01T00:00:00" + + + /record[1]/datafield[2]/subfield[1] + https://artresearch.net/resource/frick/work/991013309269707141/production-timespan-appellation/F38A46F2-D8E8-3427-8010-70AA0BAB25FC + + + /record[1]/datafield[2]/subfield[1]/text() + "1527." + + + /record[1]/datafield[@tag="245"][1]/subfield[@code="a"][1]/text() + "Sir Thomas More." + + \ No newline at end of file diff --git a/src/test/resources/association_table/01_date-generator-policy.xml b/src/test/resources/association_table/01_date-generator-policy.xml new file mode 100644 index 0000000..8ab37d4 --- /dev/null +++ b/src/test/resources/association_table/01_date-generator-policy.xml @@ -0,0 +1,30 @@ + + + + {subject_type}/{subject_id}/{identifier_type}/{identifier_id} + + + {type}/{id} + + + + + + + + {type}/{id} + + + {resourceType}/{resourceId}/{eventType} + + + {resourceType}/{resourceId}/{eventType}/timespan + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/association_table/01_date-input.xml b/src/test/resources/association_table/01_date-input.xml new file mode 100644 index 0000000..b15a6d4 --- /dev/null +++ b/src/test/resources/association_table/01_date-input.xml @@ -0,0 +1,13 @@ + + + 13826nkc a2202221 a 4500 + 991013309269707141 + 110428k15271527xx nnn | | cneng|d + + + Sir Thomas More. + + + 1527. + + \ No newline at end of file diff --git a/src/test/resources/association_table/01_date-mappings.x3ml b/src/test/resources/association_table/01_date-mappings.x3ml new file mode 100644 index 0000000..b1236ed --- /dev/null +++ b/src/test/resources/association_table/01_date-mappings.x3ml @@ -0,0 +1,153 @@ + + + + + + + + + + + + + + /record + + + crm:E22_Human-Made_Object + + work + controlfield[@tag="001"] + + + datafield[@tag="245"]/subfield[@code="a"]/text() + + + crm:P2_has_type + + crm:E55_Type + + http://vocab.getty.edu/aat/300133025 + + + + + + + + + + controlfield[@tag="008"] + + + crm:P108i_was_produced_by + + crm:E12_Production + + work + /record/controlfield[@tag="001"] + production + + + crm:P4_has_time-span + + crm:E52_Time-Span + + work + /record/controlfield[@tag="001"] + production + + + crm:P82a_begin_of_the_begin + + + + controlfield[@tag="008"] + + + xsd:dateTime + + Upper + Date_and_Time + substring(substring-after(text(), 'k'), 1, 4) + + + + + + + + + controlfield[@tag="008"] + + + crm:P108i_was_produced_by + + crm:E12_Production + + crm:P4_has_time-span + + crm:E52_Time-Span + + crm:P82b_end_of_the_end + + + + controlfield[@tag="008"] + + + xsd:dateTime + + Lower + Date_and_Time + substring(substring-after(text(), 'k'), 5, 4) + + + + + + + + + datafield[@tag="260"]/subfield[@code="c"] + + + crm:P108i_was_produced_by + + crm:E12_Production + + crm:P4_has_time-span + + crm:E52_Time-Span + + crm:P1_is_identified_by + + crm:E41_Appellation + + work + /record/controlfield[@tag="001"] + production-timespan-appellation + text() + + + crm:P190_has_symbolic_content + + + + datafield[@tag="260"]/subfield[@code="c"] + + + rdfs:Literal + + text() + + + + + + + + From c3e5dc6b558e66e4f84d05d02c97b7558a93c9a8 Mon Sep 17 00:00:00 2001 From: Artem Kozlov Date: Tue, 2 Apr 2024 21:18:10 +0300 Subject: [PATCH 2/3] Properly support complex xpath expressions in association table. Support function calls, relative path and indexed access. Update unit test accordingly. Signed-off-by: Artem Kozlov --- .../ics/isl/x3ml/engine/GeneratorContext.java | 32 +++++++++++++++-- .../01_date-expected-association-table.xml | 12 +++++++ .../01_date-generator-policy.xml | 3 ++ .../association_table/01_date-input.xml | 8 +++++ .../association_table/01_date-mappings.x3ml | 34 +++++++++++++++++++ 5 files changed, 86 insertions(+), 3 deletions(-) diff --git a/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java b/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java index 440bdb3..3b49b76 100644 --- a/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java +++ b/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java @@ -33,8 +33,11 @@ Licensed to the Apache Software Foundation (ASF) under one import java.io.OutputStreamWriter; import java.io.Writer; import java.util.ArrayDeque; +import java.util.Arrays; import java.util.Deque; import java.util.HashMap; +import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.w3c.dom.Attr; import static gr.forth.ics.isl.x3ml.X3MLEngine.exception; @@ -316,9 +319,7 @@ private void createAssociationTable(GeneratedValue generatedValue, GeneratorElem .stream() .filter(arg -> SourceType.xpath.name().equals(arg.type)) .findFirst() - // in case of multiple intermediary elements we re-write xpath to always point to the first one - // because this is a default behaviour of non merging generators - .map(arg -> arg.value.replaceAll("/", "[1]/")) + .map(arg -> this.rewriteArgXPath(arg.value)) .orElse(null); value="\""+generatedValue.text+"\""; @@ -336,6 +337,31 @@ else if(generatedValue.type == X3ML.GeneratedType.URI) { } } } + + private final Pattern NUMERIC_INDEX_PATTERN = Pattern.compile(".*\\[\\d+\\]$"); + private final Pattern FUNCTION_PATTERN = Pattern.compile(".*\\(.*\\)$"); + + /** + * In case of multiple intermediary elements we re-write xpath to always point to the first one + * because this is a default behaviour of non merging generators + */ + public String rewriteArgXPath(String xpath) { + String[] segments = xpath.split("/"); + + for (int i = 0; i < segments.length; i++) { + String segment = segments[i]; + + // Check if segment is not a function call, not a relative path, + // and does not already contain indexed access + if (!segment.isEmpty() && !segment.equals(".") && !segment.equals("..") + && !NUMERIC_INDEX_PATTERN.matcher(segment).matches() + && !FUNCTION_PATTERN.matcher(segment).matches()) { + segments[i] = segment + "[1]"; + } + } + + return Arrays.stream(segments).collect(Collectors.joining("/")); + } /**Adds a new entry in the association table with the given XPATH expression and * the given key (It is used in the case of joins). diff --git a/src/test/resources/association_table/01_date-expected-association-table.xml b/src/test/resources/association_table/01_date-expected-association-table.xml index 17e0a08..80e4570 100644 --- a/src/test/resources/association_table/01_date-expected-association-table.xml +++ b/src/test/resources/association_table/01_date-expected-association-table.xml @@ -31,6 +31,18 @@ /record[1]/datafield[2]/subfield[1]/text() "1527." + + /record[1]/datafield[3]/subfield[1] + https://artresearch.net/resource/frick/type/8E0AC9AA-3D9D-357A-984E-9B1E52105392 + + + /record[1]/datafield[3]/subfield[1] + https://artresearch.net/resource/frick/work/991013309269707141/subject/54F23224-5B7B-3276-8C6D-16F231A0A4BB + + + /record[1]/datafield[3]/subfield[1]/../../datafield[@ind2="7" and @tag="650"][1]/subfield[@code="a"][1]/text() + "Portraits: Men: With hands: With hats: Head to right." + /record[1]/datafield[@tag="245"][1]/subfield[@code="a"][1]/text() "Sir Thomas More." diff --git a/src/test/resources/association_table/01_date-generator-policy.xml b/src/test/resources/association_table/01_date-generator-policy.xml index 8ab37d4..06b8c13 100644 --- a/src/test/resources/association_table/01_date-generator-policy.xml +++ b/src/test/resources/association_table/01_date-generator-policy.xml @@ -27,4 +27,7 @@ + + {resourceType}/{resourceId}/subject/{subject_id} + \ No newline at end of file diff --git a/src/test/resources/association_table/01_date-input.xml b/src/test/resources/association_table/01_date-input.xml index b15a6d4..31bab12 100644 --- a/src/test/resources/association_table/01_date-input.xml +++ b/src/test/resources/association_table/01_date-input.xml @@ -10,4 +10,12 @@ 1527. + + Art, German. + http://id.loc.gov/authorities/subjects/sh85007675 + + + Portraits: Men: With hands: With hats: Head to right. + local + \ No newline at end of file diff --git a/src/test/resources/association_table/01_date-mappings.x3ml b/src/test/resources/association_table/01_date-mappings.x3ml index b1236ed..f6dbf31 100644 --- a/src/test/resources/association_table/01_date-mappings.x3ml +++ b/src/test/resources/association_table/01_date-mappings.x3ml @@ -148,6 +148,40 @@ + + + + datafield[@ind2="0" and @tag="650"]/subfield[@code="a"] + + + crm:P65_shows_visual_item + + crm:E36_Visual_Item + + work + /record/controlfield[@tag="001"] + ../../datafield[@ind2="7" and @tag="650"]/subfield[@code="a"]/text() + + + ../../datafield[@ind2="7" and @tag="650"]/subfield[@code="a"]/text() + + + crm:P2_has_type + + + + datafield[@ind2="0" and @tag="650"]/subfield[@code="a"] + + + custom:noType + + type + text() + + + + + From 49569ffa57ebbefa9dd07204c283f1aae5762635 Mon Sep 17 00:00:00 2001 From: Artem Kozlov Date: Wed, 3 Apr 2024 11:11:52 +0300 Subject: [PATCH 3/3] Association table. Fix complex xpath in function calls. XPath generation was messing with paths inside function calls. With this change we add [1] only to path segments that are not inside function calls. Signed-off-by: Artem Kozlov --- .../ics/isl/x3ml/engine/GeneratorContext.java | 52 +++++++++++++++---- .../01_date-expected-association-table.xml | 18 +++++-- .../01_date-generator-policy.xml | 6 +++ .../association_table/01_date-input.xml | 9 ++++ .../association_table/01_date-mappings.x3ml | 40 ++++++++++++++ 5 files changed, 113 insertions(+), 12 deletions(-) diff --git a/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java b/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java index 3b49b76..629536f 100644 --- a/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java +++ b/src/main/java/gr/forth/ics/isl/x3ml/engine/GeneratorContext.java @@ -33,11 +33,11 @@ Licensed to the Apache Software Foundation (ASF) under one import java.io.OutputStreamWriter; import java.io.Writer; import java.util.ArrayDeque; -import java.util.Arrays; +import java.util.ArrayList; import java.util.Deque; import java.util.HashMap; +import java.util.List; import java.util.regex.Pattern; -import java.util.stream.Collectors; import org.w3c.dom.Attr; import static gr.forth.ics.isl.x3ml.X3MLEngine.exception; @@ -346,21 +346,55 @@ else if(generatedValue.type == X3ML.GeneratedType.URI) { * because this is a default behaviour of non merging generators */ public String rewriteArgXPath(String xpath) { - String[] segments = xpath.split("/"); + // because we need to add [1] to every tag without index, + // but at the same time don't messup with function calls we are spliting xpath on "/" + // but doing this only if "/" is not inside function call or attribtue acces + List segments = new ArrayList<>(); + int lastSegmentStart = 0; + int bracketDepth = 0; + int parenthesisDepth = 0; - for (int i = 0; i < segments.length; i++) { - String segment = segments[i]; + for (int i = 0; i < xpath.length(); i++) { + char ch = xpath.charAt(i); + if (ch == '[') { + bracketDepth++; + } else if (ch == ']') { + bracketDepth--; + } else if (ch == '(') { + parenthesisDepth++; + } else if (ch == ')') { + parenthesisDepth--; + } else if (ch == '/' && bracketDepth == 0 && parenthesisDepth == 0 && i != 0) { + // we are not inside function call or attribute access - // Check if segment is not a function call, not a relative path, - // and does not already contain indexed access + // Check for double slash + if (i + 1 < xpath.length() && xpath.charAt(i + 1) == '/') { + i++; // Skip the next slash + } + + // If i is not 0, add the substring excluding the slash + if (i != 0) { + segments.add(xpath.substring(lastSegmentStart, i)); + } + lastSegmentStart = i + 1; // Move past the slash for the start of the next segment + } + } + + segments.add(xpath.substring(lastSegmentStart)); // Add the last segment + + for (int i = 0; i < segments.size(); i++) { + String segment = segments.get(i); + // Check if segment is not a function call, not a relative path, + // and does not already contain indexed access if (!segment.isEmpty() && !segment.equals(".") && !segment.equals("..") && !NUMERIC_INDEX_PATTERN.matcher(segment).matches() && !FUNCTION_PATTERN.matcher(segment).matches()) { - segments[i] = segment + "[1]"; + segments.set(i, segment + "[1]"); } } - return Arrays.stream(segments).collect(Collectors.joining("/")); + // re-construct xpath + return String.join("/", segments); } /**Adds a new entry in the association table with the given XPATH expression and diff --git a/src/test/resources/association_table/01_date-expected-association-table.xml b/src/test/resources/association_table/01_date-expected-association-table.xml index 80e4570..01b3c4f 100644 --- a/src/test/resources/association_table/01_date-expected-association-table.xml +++ b/src/test/resources/association_table/01_date-expected-association-table.xml @@ -32,15 +32,27 @@ "1527." - /record[1]/datafield[3]/subfield[1] + /record[1]/datafield[3]/subfield[5] + https://artresearch.net/resource/frick/type/5B02D2E1-391B-3BF8-9813-758D731C7183 + + + /record[1]/datafield[3]/subfield[5] + https://artresearch.net/resource/frick/work/991013309269707141/acquisition/94D0DF61-CB3E-3D02-B669-4A6F2A4F189F + + + /record[1]/datafield[3]/subfield[5]/string-join((../subfield[@code="a"]/text(),"-"),' ') + "Location: Frick Collection, - Acquisition: Henry Clay Frick Bequest, - -" + + + /record[1]/datafield[4]/subfield[1] https://artresearch.net/resource/frick/type/8E0AC9AA-3D9D-357A-984E-9B1E52105392 - /record[1]/datafield[3]/subfield[1] + /record[1]/datafield[4]/subfield[1] https://artresearch.net/resource/frick/work/991013309269707141/subject/54F23224-5B7B-3276-8C6D-16F231A0A4BB - /record[1]/datafield[3]/subfield[1]/../../datafield[@ind2="7" and @tag="650"][1]/subfield[@code="a"][1]/text() + /record[1]/datafield[4]/subfield[1]/../../datafield[@ind2="7" and @tag="650"][1]/subfield[@code="a"][1]/text() "Portraits: Men: With hands: With hats: Head to right." diff --git a/src/test/resources/association_table/01_date-generator-policy.xml b/src/test/resources/association_table/01_date-generator-policy.xml index 06b8c13..e58274b 100644 --- a/src/test/resources/association_table/01_date-generator-policy.xml +++ b/src/test/resources/association_table/01_date-generator-policy.xml @@ -30,4 +30,10 @@ {resourceType}/{resourceId}/subject/{subject_id} + + {resourceType}/{resourceId}/{eventType}/{from}{to}{timespan} + + + Location: {To} Acquisition: {Type} {Date} + \ No newline at end of file diff --git a/src/test/resources/association_table/01_date-input.xml b/src/test/resources/association_table/01_date-input.xml index 31bab12..e7c6c67 100644 --- a/src/test/resources/association_table/01_date-input.xml +++ b/src/test/resources/association_table/01_date-input.xml @@ -10,6 +10,15 @@ 1527. + + Frick Collection, + New York, + New York, + United States, + Henry Clay Frick Bequest, + 1912.1.77, + public. + Art, German. http://id.loc.gov/authorities/subjects/sh85007675 diff --git a/src/test/resources/association_table/01_date-mappings.x3ml b/src/test/resources/association_table/01_date-mappings.x3ml index f6dbf31..773d6fb 100644 --- a/src/test/resources/association_table/01_date-mappings.x3ml +++ b/src/test/resources/association_table/01_date-mappings.x3ml @@ -10,6 +10,7 @@ + @@ -182,6 +183,45 @@ + + + + datafield[@tag="590" and @ind1="1"]/subfield[@code="e"] + + + crm:P24i_changed_ownership_through + + crm:E8_Acquisition + + work + /record/controlfield[@tag="001"] + acquisition + - + ../subfield[@code="a"]/text() + - + + + string-join((../subfield[@code="a"]/text(),"-"),' ') + string-join((text(),"-"),' ') + string-join((../subfield[@code="l"]/text(),"-"),' ') + + + crm:P2_has_type + + + + datafield[@tag="590" and @ind1="1"]/subfield[@code="e"] + + + custom:noType + + type + text() + + + + +