From e864155d48500a1241df6de2246f337ef7865e79 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Tue, 3 Mar 2026 11:49:21 +0100 Subject: [PATCH 01/63] Remove SparqlUpdateTemplatingEngine and have its implementations (SparqlUpdateTemplatingEngineSimple and SparqlTemplatingEngineVelocity) implement TemplateEngine instead, aligning with the existing plugin pattern used by JinjaEngine, DisabledTemplateEngine, --- .../plugins/dataset/rdf/RdfPlugins.scala | 5 ++- .../rdf/tasks/SparqlUpdateCustomTask.scala | 14 ++++---- .../SparqlTemplatingEngineVelocity.scala | 36 ++++++++++++++++--- .../SparqlUpdateTemplatingEngine.scala | 28 --------------- .../SparqlUpdateTemplatingEngineSimple.scala | 35 +++++++++++++++--- ...arqlUpdateTemplatingEngineSimpleTest.scala | 6 ++-- .../SparqlTemplatingEngineVelocityTest.scala | 12 +++---- 7 files changed, 83 insertions(+), 53 deletions(-) delete mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngine.scala diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala index 9342e83304..5d0f32aa67 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala @@ -3,6 +3,7 @@ package org.silkframework.plugins.dataset.rdf import org.silkframework.plugins.dataset.rdf.datasets.{AlignmentDataset, InMemoryDataset, RdfFileDataset, SparqlDataset} import org.silkframework.plugins.dataset.rdf.executors.{LocalSparqlCopyExecutor, LocalSparqlSelectExecutor, LocalSparqlUpdateExecutor} import org.silkframework.plugins.dataset.rdf.tasks.{SparqlCopyCustomTask, SparqlSelectCustomTask, SparqlUpdateCustomTask} +import org.silkframework.plugins.dataset.rdf.tasks.templating.{SparqlSimpleTemplateEngine, SparqlVelocityTemplateEngine} import org.silkframework.plugins.dataset.rdf.vocab.{InMemoryVocabularyManager, RdfFilesVocabularyManager, RdfProjectFilesVocabularyManager, RdfVocabularyManager} import org.silkframework.runtime.plugin.{AnyPlugin, PluginModule} @@ -20,7 +21,9 @@ class RdfPlugins extends PluginModule { classOf[InMemoryVocabularyManager], classOf[SparqlSelectCustomTask], classOf[SparqlCopyCustomTask], - classOf[SparqlUpdateCustomTask] + classOf[SparqlUpdateCustomTask], + classOf[SparqlSimpleTemplateEngine], + classOf[SparqlVelocityTemplateEngine] ) ++ executors val executors = Seq( diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 9207817944..ef2e175fa6 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -35,16 +35,16 @@ case class SparqlUpdateCustomTask( ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") - val templatingEngine: SparqlUpdateTemplatingEngine = templatingMode match { - case SparqlUpdateTemplatingMode.simple => SparqlUpdateTemplatingEngineSimple(sparqlUpdateTemplate.str, batchSize) - case SparqlUpdateTemplatingMode.velocity => SparqlTemplatingEngineVelocity(sparqlUpdateTemplate.str, batchSize) + val compiledTemplate: SparqlCompiledTemplate = templatingMode match { + case SparqlUpdateTemplatingMode.simple => SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate.str) + case SparqlUpdateTemplatingMode.velocity => SparqlVelocityTemplateEngine().compile(sparqlUpdateTemplate.str) } - templatingEngine.validate() + compiledTemplate.validate(batchSize) - def isStaticTemplate: Boolean = templatingEngine.isStaticTemplate + def isStaticTemplate: Boolean = compiledTemplate.isStaticTemplate - def expectedInputSchema: EntitySchema = templatingEngine.inputSchema + def expectedInputSchema: EntitySchema = compiledTemplate.inputSchema /** * Generates The SPARQL Update query based on the placeholder assignments. @@ -52,7 +52,7 @@ case class SparqlUpdateCustomTask( * @return */ def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { - templatingEngine.generate(placeholderAssignments, taskProperties) + compiledTemplate.generate(placeholderAssignments, taskProperties) } override def inputPorts: InputPorts = { diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala index cc6a797589..98f9ddb56e 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala @@ -1,24 +1,43 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating + import org.apache.jena.update.UpdateFactory import org.apache.velocity.runtime.parser.node._ import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable +import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableValue} import org.silkframework.runtime.validation.ValidationException +import java.io.Writer import scala.util.{Failure, Success, Try} /** * A SPARQL Update templating engine based on Velocity. */ -case class SparqlTemplatingEngineVelocity(sparqlUpdateTemplate: String, batchSize: Int) extends SparqlUpdateTemplatingEngine { +@Plugin( + id = "sparqlVelocity", + label = "SPARQL Velocity", + description = "A SPARQL Update templating engine based on Apache Velocity." +) +case class SparqlVelocityTemplateEngine() extends TemplateEngine { + + override def compile(templateString: String): SparqlVelocityCompiledTemplate = { + new SparqlVelocityCompiledTemplate(templateString) + } +} + +/** + * A compiled SPARQL Update template based on Velocity. + */ +class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends SparqlCompiledTemplate { private val sparqlTemplate = SparqlVelocityTemplating.createTemplate(sparqlUpdateTemplate) override def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { SparqlVelocityTemplating.renderTemplate(sparqlTemplate, Row(placeholderAssignments), taskProperties) } - override def validate(): Unit = { + override def validate(batchSize: Int): Unit = { // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. if(!usesRawUnsafe()) { // Generate example input assignments @@ -111,8 +130,6 @@ case class SparqlTemplatingEngineVelocity(sparqlUpdateTemplate: String, batchSiz } } - case class TemplateVariableMethodUsage(rowMethod: String, parameterValue: String) - private def astReferenceName(node: Node): Option[String] = { node match { case reference: ASTReference => @@ -129,9 +146,20 @@ case class SparqlTemplatingEngineVelocity(sparqlUpdateTemplate: String, batchSiz childPaths.fold(List.empty[TemplateVariableMethodUsage])((a, b) => a ::: b) } + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + val stringValues = values.map { case (k, v) => k -> String.valueOf(v) } + writer.write(generate(stringValues, TaskProperties(Map.empty, Map.empty))) + } + + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { + evaluate(convertValues(values), writer) + } + override def isStaticTemplate: Boolean = { SparqlVelocityTemplating.templatingVariables.forall { variableName => variableMethodUsages(variableName).isEmpty } } } + +case class TemplateVariableMethodUsage(rowMethod: String, parameterValue: String) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngine.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngine.scala deleted file mode 100644 index 55b8b20d3f..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngine.scala +++ /dev/null @@ -1,28 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating - -import org.silkframework.entity.EntitySchema - -/** - * Trait that every SPARQL Update templating engine must implement. - */ -trait SparqlUpdateTemplatingEngine { - /** - * Renders the template based on the variable assignments. - */ - def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String - - /** Validates the template */ - def validate(): Unit - - /** The input entity schema that is expected by the template. */ - def inputSchema: EntitySchema - - /** The SPARQL Update template that will be rendered on every generate call */ - def sparqlUpdateTemplate: String - - /** True is the given template is static, i.e. contains no placeholder variables */ - def isStaticTemplate: Boolean -} - -/** Makes properties of the input and output task of a SPARQL Update operator execution available. */ -case class TaskProperties(inputTask: Map[String, String], outputTask: Map[String, String]) \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala index f5f3e373b9..15814fd51e 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala @@ -6,23 +6,41 @@ import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.rule.util.JenaSerializationUtil +import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableValue} import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.Uri +import java.io.Writer import scala.collection.mutable.ArrayBuffer import scala.util.Try import scala.util.matching.Regex +/** + * A simple SPARQL Update templating engine that supports plain literal and URI placeholders. + */ +@Plugin( + id = "sparqlSimple", + label = "Simple SPARQL", + description = "A simple SPARQL Update templating engine that supports plain literal and URI placeholders." +) +case class SparqlSimpleTemplateEngine() extends TemplateEngine { + + override def compile(templateString: String): SparqlSimpleCompiledTemplate = { + new SparqlSimpleCompiledTemplate(templateString) + } +} /** - * A simple templating engine that can only render plain literals and URIs. + * A compiled simple SPARQL Update template that can only render plain literals and URIs. * Example: * * DELETE DATA { ${} rdf:label ${"PROP_FROM_ENTITY_SCHEMA2"} } */ -case class SparqlUpdateTemplatingEngineSimple(sparqlUpdateTemplate: String, batchSize: Int) extends SparqlUpdateTemplatingEngine { +class SparqlSimpleCompiledTemplate(val sparqlUpdateTemplate: String) extends SparqlCompiledTemplate { + /** Validate the generated SPARQL of the template and check for batch execution characteristics */ - override def validate(): Unit = { + override def validate(batchSize: Int): Unit = { val sparql = (sparqlUpdateTemplateParts map { case SparqlUpdateTemplatePlainLiteralPlaceholder(prop) => validateUri(prop) @@ -69,6 +87,15 @@ case class SparqlUpdateTemplatingEngineSimple(sparqlUpdateTemplate: String, batc }).mkString } + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + val stringValues = values.map { case (k, v) => k -> String.valueOf(v) } + writer.write(generate(stringValues, TaskProperties(Map.empty, Map.empty))) + } + + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { + evaluate(convertValues(values), writer) + } + private def validateUri(uri: String): Unit = { Uri(uri).toURI.failed.toOption foreach { failure => throw new ValidationException(s"URI $uri used in SPARQL Update template is not a valid URI (relative or absolute)", failure) @@ -170,4 +197,4 @@ case class SparqlUpdateTemplateURIPlaceholder(prop: String) extends SparqlUpdate case class SparqlUpdateTemplatePlainLiteralPlaceholder(prop: String) extends SparqlUpdateTemplatePlaceholder /** Static SPARQL update query part */ -case class SparqlUpdateTemplateStaticPart(queryPart: String) extends SparqlUpdateTemplatePart \ No newline at end of file +case class SparqlUpdateTemplateStaticPart(queryPart: String) extends SparqlUpdateTemplatePart diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index 8dcbd85e13..5b9c3847d5 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -82,8 +82,8 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { } def parse(sparqlUpdateTemplate: String, batchSize: Int = 2): Seq[SparqlUpdateTemplatePart] = { - val engine = SparqlUpdateTemplatingEngineSimple(sparqlUpdateTemplate, batchSize) - engine.validate() - engine.sparqlUpdateTemplateParts + val compiled = SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate) + compiled.validate(batchSize) + compiled.sparqlUpdateTemplateParts } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala index 2ebc58ba0a..28fd3f3cf7 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala @@ -1,8 +1,8 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.apache.jena.vocabulary.XSD -import org.silkframework.runtime.validation.ValidationException -import org.scalatest.flatspec.AnyFlatSpec +import org.silkframework.runtime.validation.ValidationException +import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { @@ -23,8 +23,8 @@ class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { | Raw: $row.rawUnsafe("trustedValuePath") |#end |""".stripMargin - val engine = SparqlTemplatingEngineVelocity(templateString, 1) - engine.inputPaths().sorted mustBe Seq("somePath", "subject", "trustedValuePath") + val compiled = SparqlVelocityTemplateEngine().compile(templateString) + compiled.inputPaths().sorted mustBe Seq("somePath", "subject", "trustedValuePath") } private val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> @@ -38,7 +38,7 @@ class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { it should "validate without problems for valid templates" in { validate(sparqlUpdateTemplate) - SparqlTemplatingEngineVelocity(templateWithLogic, 1).inputPaths().sorted mustBe Seq("input1", "input2") + SparqlVelocityTemplateEngine().compile(templateWithLogic).inputPaths().sorted mustBe Seq("input1", "input2") validate(templateWithLogic) } @@ -72,6 +72,6 @@ class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { } def validate(template: String, batchSize: Int = 2): Unit = { - SparqlTemplatingEngineVelocity(template, batchSize).validate() + SparqlVelocityTemplateEngine().compile(template).validate(batchSize) } } From 44ca94c2adef7b39767857dceafbd0158bc42042 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Tue, 3 Mar 2026 11:56:08 +0100 Subject: [PATCH 02/63] Add SparqlCompiledTemplate --- .../templating/SparqlCompiledTemplate.scala | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala new file mode 100644 index 0000000000..7faf7ffac1 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala @@ -0,0 +1,25 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.silkframework.entity.EntitySchema +import org.silkframework.runtime.templating.CompiledTemplate + +/** + * Extension of CompiledTemplate with SPARQL Update specific capabilities. + */ +trait SparqlCompiledTemplate extends CompiledTemplate { + + /** Renders the template based on the variable assignments. */ + def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String + + /** Validates the template, including batch validation if batchSize > 1. */ + def validate(batchSize: Int): Unit + + /** The input entity schema that is expected by the template. */ + def inputSchema: EntitySchema + + /** True if the given template is static, i.e. contains no placeholder variables. */ + def isStaticTemplate: Boolean +} + +/** Makes properties of the input and output task of a SPARQL Update operator execution available. */ +case class TaskProperties(inputTask: Map[String, String], outputTask: Map[String, String]) From 2379b9ccf05b7375cc57fff36faced36a39073bc Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Tue, 3 Mar 2026 14:01:28 +0100 Subject: [PATCH 03/63] Refactoring of TemplateEngines --- .../runtime/templating/TemplateEngines.scala | 2 +- .../rdf/tasks/SparqlUpdateCustomTask.scala | 17 +- .../templating/SparqlCompiledTemplate.scala | 105 +++++++++++- .../SparqlTemplatingEngineVelocity.scala | 103 ++++-------- .../SparqlUpdateTemplatingEngineSimple.scala | 77 +++------ .../SparqlUpdateTemplatingMode.java | 27 --- .../rdf/LocalSparqlUpdateExecutorTest.scala | 14 +- ...arqlUpdateTemplatingEngineSimpleTest.scala | 2 +- .../SparqlTemplatingEngineVelocityTest.scala | 155 +++++++++--------- 9 files changed, 249 insertions(+), 253 deletions(-) delete mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingMode.java diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala index 78c2830d55..d0fd501d44 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala @@ -21,7 +21,7 @@ object TemplateEngines { */ def create(id: String): TemplateEngine = { implicit val pluginContext: PluginContext = PluginContext.empty - PluginRegistry.create[TemplateEngine](id.toLowerCase) + PluginRegistry.create[TemplateEngine](id) } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index ef2e175fa6..f2aa211c76 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -6,6 +6,7 @@ import org.silkframework.execution.typed.SparqlUpdateEntitySchema import org.silkframework.plugins.dataset.rdf.tasks.templating._ import org.silkframework.runtime.plugin.annotations.{Param, Plugin} import org.silkframework.runtime.plugin.types.SparqlCodeParameter +import org.silkframework.runtime.templating.{TemplateEngineAutocompletionProvider, TemplateEngines} @Plugin( id = "sparqlUpdateOperator", @@ -20,7 +21,7 @@ case class SparqlUpdateCustomTask( @Param( label = "SPARQL update query", value = "The SPARQL UPDATE template for constructing SPARQL UPDATE queries for every entity from the input." + - " The possible values for the template engine are `Simple` and `Velocity Engine`." + + " The possible values for the template engine are `Simple`, `Velocity Engine` and `Jinja`." + " See the general documentation of this plugin for further details on the features of each template engine.", example = "DELETE DATA { ${} rdf:label ${\"PROP_FROM_ENTITY_SCHEMA2\"} }" ) @@ -28,16 +29,16 @@ case class SparqlUpdateCustomTask( @Param(label = "Batch size", value = "How many entities should be handled in a single update request.") batchSize: Int = SparqlUpdateCustomTask.defaultBatchSize, @Param( - "The templating mode for the template engine. The possible values are `Simple` and `Velocity Engine`." + - " See the general documentation of this plugin for further details on the features of each template engine.", + value = "The templating mode for the template engine. See the general documentation of this plugin for further details on the features of each template engine.", + autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider] ) - templatingMode: SparqlUpdateTemplatingMode = SparqlUpdateTemplatingMode.simple + language: String = SparqlSimpleTemplateEngine.id ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") - val compiledTemplate: SparqlCompiledTemplate = templatingMode match { - case SparqlUpdateTemplatingMode.simple => SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate.str) - case SparqlUpdateTemplatingMode.velocity => SparqlVelocityTemplateEngine().compile(sparqlUpdateTemplate.str) + val compiledTemplate: SparqlCompiledTemplate = { + val templateEngine = TemplateEngines.create(language) + new SparqlCompiledTemplate(templateEngine.compile(sparqlUpdateTemplate.str)) } compiledTemplate.validate(batchSize) @@ -68,4 +69,4 @@ case class SparqlUpdateCustomTask( object SparqlUpdateCustomTask { final val defaultBatchSize = 1 -} \ No newline at end of file +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala index 7faf7ffac1..73f7ab2958 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala @@ -1,24 +1,115 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating +import org.apache.jena.update.UpdateFactory import org.silkframework.entity.EntitySchema -import org.silkframework.runtime.templating.CompiledTemplate +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.execution.local.EmptyEntityTable +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateVariableValue} +import org.silkframework.runtime.validation.ValidationException + +import java.io.{StringWriter, Writer} +import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} /** - * Extension of CompiledTemplate with SPARQL Update specific capabilities. + * Wraps a [[CompiledTemplate]] and adds SPARQL Update specific capabilities. */ -trait SparqlCompiledTemplate extends CompiledTemplate { +class SparqlCompiledTemplate(delegate: CompiledTemplate) extends CompiledTemplate { /** Renders the template based on the variable assignments. */ - def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String + def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { + val values = new java.util.LinkedHashMap[String, AnyRef]() + placeholderAssignments.foreach { case (k, v) => values.put(k, v) } + if (taskProperties.inputTask.nonEmpty) { + values.put("inputProperties", taskProperties.inputTask.asJava) + } + if (taskProperties.outputTask.nonEmpty) { + values.put("outputProperties", taskProperties.outputTask.asJava) + } + val writer = new StringWriter() + delegate.evaluate(values.asScala.toMap, writer) + writer.toString + } /** Validates the template, including batch validation if batchSize > 1. */ - def validate(batchSize: Int): Unit + def validate(batchSize: Int): Unit = { + // Skip validation if variables cannot be determined (e.g. Velocity templates using rawUnsafe) + delegate.variables match { + case None => return + case Some(_) => + } + val genericUri = "urn:generic:1" + val entityVariables = entityVariableNames + val assignments = entityVariables.map(_ -> genericUri).toMap + val inputPropVars = taskPropertyVariableNames("inputProperties").map(_ -> genericUri).toMap + val outputPropVars = taskPropertyVariableNames("outputProperties").map(_ -> genericUri).toMap + val taskProps = TaskProperties(inputPropVars, outputPropVars) + val sparqlQuery = Try(generate(assignments, taskProps)) match { + case Failure(exception) => + throw new ValidationException( + "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) + case Success(value) => value + } + Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + + parseError.getMessage + ", example query: " + sparqlQuery) + } + if (batchSize > 1) { + val batchSparql = sparqlQuery + "\n" + sparqlQuery + Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + + parseError.getMessage + ", example batch query: " + batchSparql) + } + } + } /** The input entity schema that is expected by the template. */ - def inputSchema: EntitySchema + def inputSchema: EntitySchema = { + val properties = entityVariableNames + if (properties.isEmpty) { + EmptyEntityTable.schema + } else { + EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) + } + } /** True if the given template is static, i.e. contains no placeholder variables. */ - def isStaticTemplate: Boolean + def isStaticTemplate: Boolean = { + delegate.variables match { + case Some(vars) => vars.isEmpty + case None => false + } + } + + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + delegate.evaluate(values, writer) + } + + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { + delegate.evaluate(values, writer, evaluationConfig) + } + + /** Returns entity variable names (those with empty scope). */ + private def entityVariableNames: Seq[String] = { + delegate.variables match { + case Some(vars) => + vars.filter(_.scope.isEmpty).map(_.name).distinct + case None => + Seq.empty + } + } + + /** Returns variable names for a specific scope (e.g. "inputProperties", "outputProperties"). */ + private def taskPropertyVariableNames(scope: String): Seq[String] = { + delegate.variables match { + case Some(vars) => + vars.filter(_.scope == scope).map(_.name).distinct + case None => + Seq.empty + } + } } /** Makes properties of the input and output task of a SPARQL Update operator execution available. */ diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala index 98f9ddb56e..33b5cfab7a 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala @@ -1,22 +1,17 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.apache.jena.update.UpdateFactory import org.apache.velocity.runtime.parser.node._ -import org.silkframework.entity.EntitySchema -import org.silkframework.entity.paths.UntypedPath -import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.runtime.plugin.annotations.Plugin -import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableValue} -import org.silkframework.runtime.validation.ValidationException +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableName, TemplateVariableValue} import java.io.Writer -import scala.util.{Failure, Success, Try} +import scala.collection.JavaConverters._ /** * A SPARQL Update templating engine based on Velocity. */ @Plugin( - id = "sparqlVelocity", + id = SparqlVelocityTemplateEngine.id, label = "SPARQL Velocity", description = "A SPARQL Update templating engine based on Apache Velocity." ) @@ -27,64 +22,54 @@ case class SparqlVelocityTemplateEngine() extends TemplateEngine { } } +object SparqlVelocityTemplateEngine { + final val id = "sparqlVelocity" +} + /** * A compiled SPARQL Update template based on Velocity. */ -class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends SparqlCompiledTemplate { +class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends CompiledTemplate { private val sparqlTemplate = SparqlVelocityTemplating.createTemplate(sparqlUpdateTemplate) - override def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { - SparqlVelocityTemplating.renderTemplate(sparqlTemplate, Row(placeholderAssignments), taskProperties) - } - - override def validate(batchSize: Int): Unit = { - // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. - if(!usesRawUnsafe()) { - // Generate example input assignments - val genericUri = "urn:generic:1" // Valid URI string is valid in URI and literal position, so use always the same URI - val assignments = inputPaths().map(p => (p, genericUri)).toMap - val inputPropertiesAssignments = variableMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME).map(_.parameterValue -> genericUri).toMap - val outputPropertiesAssignments = variableMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME).map(_.parameterValue -> genericUri).toMap - // Generate SPARQL Update query with example assignments - val sparqlQuery = Try(generate(assignments, TaskProperties(inputPropertiesAssignments, outputPropertiesAssignments))) match { - case Failure(exception) => - throw new ValidationException("The SPARQL Update template could not be rendered with example value. Error message: " + exception.getMessage, exception) - case Success(value) => value - } - // Validate generated SPARQL Update query - Try(UpdateFactory.create(sparqlQuery)).failed.toOption foreach { parseError => - throw new ValidationException("The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparqlQuery) - } - // If queries should be batched, also check if queries can be batched, i.e. concatenated and still have valid syntax - if (batchSize > 1) { - val batchSparql = sparqlQuery + "\n" + sparqlQuery - Try(UpdateFactory.create(batchSparql)).failed.toOption foreach { parseError => - throw new ValidationException("The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) - } - } + override lazy val variables: Option[Seq[TemplateVariableName]] = { + if (usesRawUnsafe()) { + None + } else { + val rowVars = variableMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "")) + val inputPropVars = variableMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) + val outputPropVars = variableMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) + Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) } } - override def inputSchema: EntitySchema = { - val properties = inputPaths() - if (properties.isEmpty) { - EmptyEntityTable.schema // Static template, no input data needed - } else { - EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + // Separate entity variables from task property variables + val entityVars = scala.collection.mutable.Map[String, String]() + var inputProps = Map.empty[String, String] + var outputProps = Map.empty[String, String] + values.foreach { + case ("inputProperties", m: java.util.Map[_, _]) => + inputProps = m.asScala.map { case (k, v) => String.valueOf(k) -> String.valueOf(v) }.toMap + case ("outputProperties", m: java.util.Map[_, _]) => + outputProps = m.asScala.map { case (k, v) => String.valueOf(k) -> String.valueOf(v) }.toMap + case (k, v) => + entityVars(k) = String.valueOf(v) } + writer.write(SparqlVelocityTemplating.renderTemplate(sparqlTemplate, Row(entityVars.toMap), TaskProperties(inputProps, outputProps))) } - def inputPaths(): Seq[String] = { - variableMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME).map(_.parameterValue).distinct + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { + evaluate(convertValues(values), writer) } // Extracts all method invocations on the given variable name in the config - def variableMethodUsages(variableName: String): Seq[TemplateVariableMethodUsage] = { + private[templating] def variableMethodUsages(variableName: String): Seq[TemplateVariableMethodUsage] = { sparqlTemplate.getData match { case simpleNode: SimpleNode => - // This should always be the case retrieveRowMethodUsages(simpleNode, variableName) case None => throw new RuntimeException(s"Unexpected error: Cannot retrieve $variableName object method usages from Velocity template.") @@ -99,7 +84,7 @@ class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends S private val rawUnsafeMethodName = "rawUnsafe" - final val rowMethodsWithPathParameter = Set("uri", "plainLiteral", rawUnsafeMethodName, "exists") + private final val rowMethodsWithPathParameter = Set("uri", "plainLiteral", rawUnsafeMethodName, "exists") /** Retrieves the input paths that are used via the [[Row]] API. */ private def retrieveRowMethodUsages(simpleNode: Node, varName: String): List[TemplateVariableMethodUsage] = { simpleNode match { @@ -108,7 +93,6 @@ class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends S case Some(v) if v == varName && rowMethodsWithPathParameter.contains(astMethod.getMethodName) && validStringRowMethodParameter(astMethod) => - // Collect parameter values from the specified methods of the 'row' object, since only these must all be input paths. val parameterValue = astMethod.jjtGetChild(1).jjtGetChild(0).asInstanceOf[ASTStringLiteral].literal().stripPrefix("\"").stripSuffix("\"") List(TemplateVariableMethodUsage(astMethod.getMethodName, parameterValue)) case _ => @@ -145,21 +129,6 @@ class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends S } childPaths.fold(List.empty[TemplateVariableMethodUsage])((a, b) => a ::: b) } - - override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { - val stringValues = values.map { case (k, v) => k -> String.valueOf(v) } - writer.write(generate(stringValues, TaskProperties(Map.empty, Map.empty))) - } - - override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { - evaluate(convertValues(values), writer) - } - - override def isStaticTemplate: Boolean = { - SparqlVelocityTemplating.templatingVariables.forall { variableName => - variableMethodUsages(variableName).isEmpty - } - } } case class TemplateVariableMethodUsage(rowMethod: String, parameterValue: String) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala index 15814fd51e..3c545f7ef0 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala @@ -1,26 +1,21 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.apache.jena.graph.NodeFactory -import org.apache.jena.update.UpdateFactory -import org.silkframework.entity.EntitySchema -import org.silkframework.entity.paths.UntypedPath -import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.rule.util.JenaSerializationUtil import org.silkframework.runtime.plugin.annotations.Plugin -import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableValue} +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableName, TemplateVariableValue} import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.Uri import java.io.Writer import scala.collection.mutable.ArrayBuffer -import scala.util.Try import scala.util.matching.Regex /** * A simple SPARQL Update templating engine that supports plain literal and URI placeholders. */ @Plugin( - id = "sparqlSimple", + id = SparqlSimpleTemplateEngine.id, label = "Simple SPARQL", description = "A simple SPARQL Update templating engine that supports plain literal and URI placeholders." ) @@ -31,45 +26,32 @@ case class SparqlSimpleTemplateEngine() extends TemplateEngine { } } +object SparqlSimpleTemplateEngine { + final val id = "sparqlSimple" +} + /** * A compiled simple SPARQL Update template that can only render plain literals and URIs. * Example: * * DELETE DATA { ${} rdf:label ${"PROP_FROM_ENTITY_SCHEMA2"} } */ -class SparqlSimpleCompiledTemplate(val sparqlUpdateTemplate: String) extends SparqlCompiledTemplate { +class SparqlSimpleCompiledTemplate(val sparqlUpdateTemplate: String) extends CompiledTemplate { - /** Validate the generated SPARQL of the template and check for batch execution characteristics */ - override def validate(batchSize: Int): Unit = { - val sparql = (sparqlUpdateTemplateParts map { - case SparqlUpdateTemplatePlainLiteralPlaceholder(prop) => - validateUri(prop) - "\"placeholder value\"" - case SparqlUpdateTemplateURIPlaceholder(prop) => - validateUri(prop) - "" - case SparqlUpdateTemplateStaticPart(partialSparql) => - partialSparql - }).mkString - Try(UpdateFactory.create(sparql)).failed.toOption foreach { parseError => - throw new ValidationException("The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparql) - } - if(batchSize > 1) { - val batchSparql = sparql + "\n" + sparql - Try(UpdateFactory.create(batchSparql)).failed.toOption foreach { parseError => - throw new ValidationException("The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) - } - } + override lazy val variables: Option[Seq[TemplateVariableName]] = Some( + properties.map(p => new TemplateVariableName(p, "")) + ) + + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + val stringValues = values.map { case (k, v) => k -> String.valueOf(v) } + writer.write(render(stringValues)) + } + + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { + evaluate(convertValues(values), writer) } - /** - * Generates The SPARQL Update query based on the placeholder assignments. - * @param placeholderAssignments For each placeholder in the query template - * @return - */ - override def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { + private def render(placeholderAssignments: Map[String, String]): String = { def assignmentValue(prop: String): String = placeholderAssignments.get(prop) match { case Some(value) => value case None => throw new ValidationException(s"No value assignment for placeholder property $prop") @@ -87,15 +69,6 @@ class SparqlSimpleCompiledTemplate(val sparqlUpdateTemplate: String) extends Spa }).mkString } - override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { - val stringValues = values.map { case (k, v) => k -> String.valueOf(v) } - writer.write(generate(stringValues, TaskProperties(Map.empty, Map.empty))) - } - - override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { - evaluate(convertValues(values), writer) - } - private def validateUri(uri: String): Unit = { Uri(uri).toURI.failed.toOption foreach { failure => throw new ValidationException(s"URI $uri used in SPARQL Update template is not a valid URI (relative or absolute)", failure) @@ -162,22 +135,10 @@ class SparqlSimpleCompiledTemplate(val sparqlUpdateTemplate: String) extends Spa templateParts.toSeq } - override def inputSchema: EntitySchema = { - if (isStaticTemplate) { - EmptyEntityTable.schema // Static template, no input data needed - } else { - EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) - } - } - private val properties: Seq[String] = sparqlUpdateTemplateParts. filter(_.isInstanceOf[SparqlUpdateTemplatePlaceholder]). map(_.asInstanceOf[SparqlUpdateTemplatePlaceholder].prop). distinct - - override def isStaticTemplate: Boolean = { - properties.isEmpty - } } sealed trait SparqlUpdateTemplatePart diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingMode.java b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingMode.java deleted file mode 100644 index 48c9cbaf73..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingMode.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating; - -import org.silkframework.runtime.plugin.types.EnumerationParameterType; - -/** - * - */ -public enum SparqlUpdateTemplatingMode implements EnumerationParameterType { - simple("simple", "Simple"), - velocity("velocity", "Velocity Engine"); - - private String id; - private String displayName; - - SparqlUpdateTemplatingMode(String id, String displayName) { - this.id = id; - this.displayName = displayName; - } - - public String id() { - return id; - } - - public String displayName() { - return displayName; - } -} \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala index aa9477b361..3e50750ed2 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala @@ -8,7 +8,7 @@ import org.silkframework.execution.{ExecutionReport, ExecutorOutput} import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution} import org.silkframework.plugins.dataset.rdf.executors.LocalSparqlUpdateExecutor import org.silkframework.plugins.dataset.rdf.tasks.SparqlUpdateCustomTask -import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlUpdateTemplatingMode +import org.silkframework.plugins.dataset.rdf.tasks.templating.{SparqlSimpleTemplateEngine, SparqlVelocityTemplateEngine} import org.silkframework.runtime.activity.{ActivityContext, UserContext} import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, TestPluginContext} import org.silkframework.runtime.validation.ValidationException @@ -96,7 +96,7 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW it should "output one UPDATE query per input task when the template contains input property placeholders" in { val templateWithInputPropertyPlaceholders = """INSERT DATA { $inputProperties.uri("graph") $inputProperties.plainLiteral("graph") };""" val result = executeTask(templateWithInputPropertyPlaceholders, Seq(mockInputTable(Seq("graph" -> "g1")), - mockInputTable(Seq("graph" -> "g2"))), SparqlUpdateTemplatingMode.velocity) + mockInputTable(Seq("graph" -> "g2"))), SparqlVelocityTemplateEngine.id) result.entities.map(_.values.flatten.head).toList mustBe List("INSERT DATA { \"g1\" };\n" + "INSERT DATA { \"g2\" };") } @@ -104,20 +104,20 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW it should "output one UPDATE query overall even for multiple inputs when no placeholder is used at all" in { val staticTemplate = """INSERT DATA { "1" };""" val result = executeTask(staticTemplate, Seq(mockInputTable(Seq("graph" -> "g1")), - mockInputTable(Seq("graph" -> "g2"))), SparqlUpdateTemplatingMode.velocity) + mockInputTable(Seq("graph" -> "g2"))), SparqlVelocityTemplateEngine.id) result.entities.map(_.values.flatten.head).toList mustBe List(staticTemplate) } private def sparqlUpdateTask(template: String, - mode: SparqlUpdateTemplatingMode): Task[SparqlUpdateCustomTask] = { - project.updateTask("task", SparqlUpdateCustomTask(template, batchSize = batchSize, templatingMode = mode)) + language: String): Task[SparqlUpdateCustomTask] = { + project.updateTask("task", SparqlUpdateCustomTask(template, batchSize = batchSize, language = language)) } private def executeTask(template: String, input: Seq[GenericEntityTable], - mode: SparqlUpdateTemplatingMode = SparqlUpdateTemplatingMode.simple, + language: String = SparqlSimpleTemplateEngine.id, activityContext: ActivityContext[ExecutionReport] = context): LocalEntities = { - val result = executor.execute(sparqlUpdateTask(template, mode), input, ExecutorOutput.empty, LocalExecution(true), activityContext) + val result = executor.execute(sparqlUpdateTask(template, language), input, ExecutorOutput.empty, LocalExecution(true), activityContext) result mustBe defined result.get } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index 5b9c3847d5..65a7789a41 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -83,7 +83,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { def parse(sparqlUpdateTemplate: String, batchSize: Int = 2): Seq[SparqlUpdateTemplatePart] = { val compiled = SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate) - compiled.validate(batchSize) + new SparqlCompiledTemplate(compiled).validate(batchSize) compiled.sparqlUpdateTemplateParts } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala index 28fd3f3cf7..e8a6713742 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala @@ -1,77 +1,78 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating - -import org.apache.jena.vocabulary.XSD -import org.silkframework.runtime.validation.ValidationException -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers - -class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { - behavior of "Velocity SPARQL Templating Engine" - - private val sparqlUpdateTemplate = - s"""PREFIX rdf: - |PREFIX xsd: <${XSD.getURI}> - |DELETE DATA { $$row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $$row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } ; - |INSERT DATA { $$row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $$row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3")^^xsd:int } ;""".stripMargin - - it should "output the correct input paths of the template" in { - val templateString = - """ - |$row.uri("subject") - |#if ( $row.exists("somePath") ) - | Plain: $row.plainLiteral("somePath") - | Raw: $row.rawUnsafe("trustedValuePath") - |#end - |""".stripMargin - val compiled = SparqlVelocityTemplateEngine().compile(templateString) - compiled.inputPaths().sorted mustBe Seq("somePath", "subject", "trustedValuePath") - } - - private val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> - |INSERT DATA { - | "entity 1" . - | #if ($$row.exists("input1")) - | $$row.uri("input1") $$row.plainLiteral("input2")^^xsd:string - | #end - |}; - |""".stripMargin - - it should "validate without problems for valid templates" in { - validate(sparqlUpdateTemplate) - SparqlVelocityTemplateEngine().compile(templateWithLogic).inputPaths().sorted mustBe Seq("input1", "input2") - validate(templateWithLogic) - } - - it should "always validate templates as correct if rawUnsafe() is used, because there is no way to generate meaningful examples to validate" in { - validate("""Completely broken SPARQL Update query with $row.rawUnsafe("something")""") - } - - it should "raise a validation error when the template is invalid" in { - intercept[ValidationException] { - validate("""DELETE DATA { $row.uri("test") rdf:label } ;""") - } - intercept[ValidationException] { - validate("""DELETE DATA { rdf:label $row.uri(3) ;""") - } - intercept[ValidationException] { - // No rdf prefix defined - validate("""DELETE DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } ; - | INSERT DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3") } ;""".stripMargin) - } - intercept[ValidationException] { - validate("""PREFIX foaf: - | - |WITH - |DELETE { ?person ?property ?value } - |WHERE { ?person ?property ?value ; foaf:givenName 'Fred } ;""".stripMargin) // Missing closing ' for literal - } - validate(sparqlUpdateTemplate.dropRight(1), batchSize = 1) // Dropped ';' at the end, not batch supported, but batch size is 1 - intercept[ValidationException] { - validate(sparqlUpdateTemplate.dropRight(1)) // Dropped ';' at the end, not batch supported - } - } - - def validate(template: String, batchSize: Int = 2): Unit = { - SparqlVelocityTemplateEngine().compile(template).validate(batchSize) - } -} +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.apache.jena.vocabulary.XSD + +import org.silkframework.runtime.validation.ValidationException +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers + +class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { + behavior of "Velocity SPARQL Templating Engine" + + private val sparqlUpdateTemplate = + s"""PREFIX rdf: + |PREFIX xsd: <${XSD.getURI}> + |DELETE DATA { $$row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $$row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } ; + |INSERT DATA { $$row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $$row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3")^^xsd:int } ;""".stripMargin + + it should "output the correct input paths of the template" in { + val templateString = + """ + |$row.uri("subject") + |#if ( $row.exists("somePath") ) + | Plain: $row.plainLiteral("somePath") + | Raw: $row.rawUnsafe("trustedValuePath") + |#end + |""".stripMargin + val compiled = SparqlVelocityTemplateEngine().compile(templateString) + compiled.variables.get.filter(_.scope.isEmpty).map(_.name).sorted mustBe Seq("somePath", "subject", "trustedValuePath") + } + + private val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> + |INSERT DATA { + | "entity 1" . + | #if ($$row.exists("input1")) + | $$row.uri("input1") $$row.plainLiteral("input2")^^xsd:string + | #end + |}; + |""".stripMargin + + it should "validate without problems for valid templates" in { + validate(sparqlUpdateTemplate) + SparqlVelocityTemplateEngine().compile(templateWithLogic).variables.get.filter(_.scope.isEmpty).map(_.name).sorted mustBe Seq("input1", "input2") + validate(templateWithLogic) + } + + it should "always validate templates as correct if rawUnsafe() is used, because there is no way to generate meaningful examples to validate" in { + validate("""Completely broken SPARQL Update query with $row.rawUnsafe("something")""") + } + + it should "raise a validation error when the template is invalid" in { + intercept[ValidationException] { + validate("""DELETE DATA { $row.uri("test") rdf:label } ;""") + } + intercept[ValidationException] { + validate("""DELETE DATA { rdf:label $row.uri(3) ;""") + } + intercept[ValidationException] { + // No rdf prefix defined + validate("""DELETE DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } ; + | INSERT DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3") } ;""".stripMargin) + } + intercept[ValidationException] { + validate("""PREFIX foaf: + | + |WITH + |DELETE { ?person ?property ?value } + |WHERE { ?person ?property ?value ; foaf:givenName 'Fred } ;""".stripMargin) // Missing closing ' for literal + } + validate(sparqlUpdateTemplate.dropRight(1), batchSize = 1) // Dropped ';' at the end, not batch supported, but batch size is 1 + intercept[ValidationException] { + validate(sparqlUpdateTemplate.dropRight(1)) // Dropped ';' at the end, not batch supported + } + } + + def validate(template: String, batchSize: Int = 2): Unit = { + new SparqlCompiledTemplate(SparqlVelocityTemplateEngine().compile(template)).validate(batchSize) + } +} From 068f6a7ff83e8691ab4b21bbee1f2e85cbbab51a Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 18 Mar 2026 11:41:52 +0100 Subject: [PATCH 04/63] Fix SPARQL templating. Also make it backwards-compatible with existing projects. --- .../runtime/templating/TemplateEngine.scala | 2 + .../rdf/tasks/SparqlUpdateCustomTask.scala | 4 +- .../templating/SparqlCompiledTemplate.scala | 51 +++++++++---------- .../SparqlTemplatingEngineVelocity.scala | 28 +++++----- .../SparqlUpdateTemplatingEngineSimple.scala | 2 +- .../rdf/LocalSparqlUpdateExecutorTest.scala | 2 +- 6 files changed, 44 insertions(+), 45 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala index 0d6c20185f..1b5985ed31 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala @@ -24,6 +24,8 @@ trait TemplateEngine extends AnyPlugin { */ trait CompiledTemplate { + def usesRawUnsafe(): Boolean = false + /** * Holds all unbound variables in the template. * Returns None, if this functionality is not supported. diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index f2aa211c76..fbbcce1442 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -32,12 +32,12 @@ case class SparqlUpdateCustomTask( value = "The templating mode for the template engine. See the general documentation of this plugin for further details on the features of each template engine.", autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider] ) - language: String = SparqlSimpleTemplateEngine.id + templatingMode: String = SparqlSimpleTemplateEngine.id ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") val compiledTemplate: SparqlCompiledTemplate = { - val templateEngine = TemplateEngines.create(language) + val templateEngine = TemplateEngines.create(templatingMode) new SparqlCompiledTemplate(templateEngine.compile(sparqlUpdateTemplate.str)) } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala index 73f7ab2958..aa6b7df8d0 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala @@ -33,34 +33,31 @@ class SparqlCompiledTemplate(delegate: CompiledTemplate) extends CompiledTemplat /** Validates the template, including batch validation if batchSize > 1. */ def validate(batchSize: Int): Unit = { - // Skip validation if variables cannot be determined (e.g. Velocity templates using rawUnsafe) - delegate.variables match { - case None => return - case Some(_) => - } - val genericUri = "urn:generic:1" - val entityVariables = entityVariableNames - val assignments = entityVariables.map(_ -> genericUri).toMap - val inputPropVars = taskPropertyVariableNames("inputProperties").map(_ -> genericUri).toMap - val outputPropVars = taskPropertyVariableNames("outputProperties").map(_ -> genericUri).toMap - val taskProps = TaskProperties(inputPropVars, outputPropVars) - val sparqlQuery = Try(generate(assignments, taskProps)) match { - case Failure(exception) => - throw new ValidationException( - "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) - case Success(value) => value - } - Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => - throw new ValidationException( - "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparqlQuery) - } - if (batchSize > 1) { - val batchSparql = sparqlQuery + "\n" + sparqlQuery - Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + if(!delegate.usesRawUnsafe()) { + val genericUri = "urn:generic:1" + val entityVariables = entityVariableNames + val assignments = entityVariables.map(_ -> genericUri).toMap + val inputPropVars = taskPropertyVariableNames("inputProperties").map(_ -> genericUri).toMap + val outputPropVars = taskPropertyVariableNames("outputProperties").map(_ -> genericUri).toMap + val taskProps = TaskProperties(inputPropVars, outputPropVars) + val sparqlQuery = Try(generate(assignments, taskProps)) match { + case Failure(exception) => + throw new ValidationException( + "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) + case Success(value) => value + } + Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => throw new ValidationException( - "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) + "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + + parseError.getMessage + ", example query: " + sparqlQuery) + } + if (batchSize > 1) { + val batchSparql = sparqlQuery + "\n" + sparqlQuery + Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + + parseError.getMessage + ", example batch query: " + batchSparql) + } } } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala index 33b5cfab7a..9c240d00f3 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala @@ -1,11 +1,14 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating +import org.apache.jena.update.UpdateFactory import org.apache.velocity.runtime.parser.node._ import org.silkframework.runtime.plugin.annotations.Plugin import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableName, TemplateVariableValue} +import org.silkframework.runtime.validation.ValidationException -import java.io.Writer +import java.io.{StringWriter, Writer} import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} /** * A SPARQL Update templating engine based on Velocity. @@ -23,27 +26,24 @@ case class SparqlVelocityTemplateEngine() extends TemplateEngine { } object SparqlVelocityTemplateEngine { - final val id = "sparqlVelocity" + final val id = "velocity" } /** * A compiled SPARQL Update template based on Velocity. */ class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends CompiledTemplate { + private val sparqlTemplate = SparqlVelocityTemplating.createTemplate(sparqlUpdateTemplate) override lazy val variables: Option[Seq[TemplateVariableName]] = { - if (usesRawUnsafe()) { - None - } else { - val rowVars = variableMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "")) - val inputPropVars = variableMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) - val outputPropVars = variableMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) - Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) - } + val rowVars = variableMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "")) + val inputPropVars = variableMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) + val outputPropVars = variableMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) + Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) } override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { @@ -76,7 +76,7 @@ class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends C } } - private def usesRawUnsafe(): Boolean = { + override def usesRawUnsafe(): Boolean = { SparqlVelocityTemplating.templatingVariables.exists { variableName => variableMethodUsages(variableName).exists(_.rowMethod == rawUnsafeMethodName) } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala index 3c545f7ef0..510b864b10 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala @@ -27,7 +27,7 @@ case class SparqlSimpleTemplateEngine() extends TemplateEngine { } object SparqlSimpleTemplateEngine { - final val id = "sparqlSimple" + final val id = "simple" } /** diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala index 3e50750ed2..6f4389d9d7 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala @@ -110,7 +110,7 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW private def sparqlUpdateTask(template: String, language: String): Task[SparqlUpdateCustomTask] = { - project.updateTask("task", SparqlUpdateCustomTask(template, batchSize = batchSize, language = language)) + project.updateTask("task", SparqlUpdateCustomTask(template, batchSize = batchSize, templatingMode = language)) } private def executeTask(template: String, From e5fe97ea5026710cef047f13df2c9724a52e6c1a Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 18 Mar 2026 12:01:08 +0100 Subject: [PATCH 05/63] SparqlTemplate refactoring. Move the very specifc usesRawUnsafe out of the general CompileTemplate class. --- .../runtime/templating/TemplateEngine.scala | 2 - .../rdf/tasks/SparqlUpdateCustomTask.scala | 4 +- ...ledTemplate.scala => SparqlTemplate.scala} | 76 +++++++++---------- .../SparqlTemplatingEngineVelocity.scala | 11 +-- ...arqlUpdateTemplatingEngineSimpleTest.scala | 2 +- .../SparqlTemplatingEngineVelocityTest.scala | 2 +- 6 files changed, 44 insertions(+), 53 deletions(-) rename silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlCompiledTemplate.scala => SparqlTemplate.scala} (54%) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala index 1b5985ed31..0d6c20185f 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala @@ -24,8 +24,6 @@ trait TemplateEngine extends AnyPlugin { */ trait CompiledTemplate { - def usesRawUnsafe(): Boolean = false - /** * Holds all unbound variables in the template. * Returns None, if this functionality is not supported. diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index fbbcce1442..45541f964f 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -36,9 +36,9 @@ case class SparqlUpdateCustomTask( ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") - val compiledTemplate: SparqlCompiledTemplate = { + val compiledTemplate: SparqlTemplate = { val templateEngine = TemplateEngines.create(templatingMode) - new SparqlCompiledTemplate(templateEngine.compile(sparqlUpdateTemplate.str)) + new SparqlTemplate(templateEngine.compile(sparqlUpdateTemplate.str)) } compiledTemplate.validate(batchSize) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala similarity index 54% rename from silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala rename to silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index aa6b7df8d0..ba3eb3ca17 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlCompiledTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -4,17 +4,17 @@ import org.apache.jena.update.UpdateFactory import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateVariableValue} +import org.silkframework.runtime.templating.CompiledTemplate import org.silkframework.runtime.validation.ValidationException -import java.io.{StringWriter, Writer} -import scala.collection.JavaConverters._ +import java.io.StringWriter +import scala.jdk.CollectionConverters.{MapHasAsJava, MapHasAsScala} import scala.util.{Failure, Success, Try} /** * Wraps a [[CompiledTemplate]] and adds SPARQL Update specific capabilities. */ -class SparqlCompiledTemplate(delegate: CompiledTemplate) extends CompiledTemplate { +class SparqlTemplate(template: CompiledTemplate) { /** Renders the template based on the variable assignments. */ def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { @@ -27,38 +27,42 @@ class SparqlCompiledTemplate(delegate: CompiledTemplate) extends CompiledTemplat values.put("outputProperties", taskProperties.outputTask.asJava) } val writer = new StringWriter() - delegate.evaluate(values.asScala.toMap, writer) + template.evaluate(values.asScala.toMap, writer) writer.toString } /** Validates the template, including batch validation if batchSize > 1. */ def validate(batchSize: Int): Unit = { - if(!delegate.usesRawUnsafe()) { - val genericUri = "urn:generic:1" - val entityVariables = entityVariableNames - val assignments = entityVariables.map(_ -> genericUri).toMap - val inputPropVars = taskPropertyVariableNames("inputProperties").map(_ -> genericUri).toMap - val outputPropVars = taskPropertyVariableNames("outputProperties").map(_ -> genericUri).toMap - val taskProps = TaskProperties(inputPropVars, outputPropVars) - val sparqlQuery = Try(generate(assignments, taskProps)) match { - case Failure(exception) => - throw new ValidationException( - "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) - case Success(value) => value - } - Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => - throw new ValidationException( - "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparqlQuery) - } - if (batchSize > 1) { - val batchSparql = sparqlQuery + "\n" + sparqlQuery - Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + template match { + case compiled: SparqlVelocityCompiledTemplate if compiled.usesRawUnsafe() => + // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. + case _ => + // Generate example input assignments + val genericUri = "urn:generic:1" + val entityVariables = entityVariableNames + val assignments = entityVariables.map(_ -> genericUri).toMap + val inputPropVars = taskPropertyVariableNames("inputProperties").map(_ -> genericUri).toMap + val outputPropVars = taskPropertyVariableNames("outputProperties").map(_ -> genericUri).toMap + val taskProps = TaskProperties(inputPropVars, outputPropVars) + val sparqlQuery = Try(generate(assignments, taskProps)) match { + case Failure(exception) => + throw new ValidationException( + "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) + case Success(value) => value + } + Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => throw new ValidationException( - "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) + "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + + parseError.getMessage + ", example query: " + sparqlQuery) + } + if (batchSize > 1) { + val batchSparql = sparqlQuery + "\n" + sparqlQuery + Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + + parseError.getMessage + ", example batch query: " + batchSparql) + } } - } } } @@ -74,23 +78,15 @@ class SparqlCompiledTemplate(delegate: CompiledTemplate) extends CompiledTemplat /** True if the given template is static, i.e. contains no placeholder variables. */ def isStaticTemplate: Boolean = { - delegate.variables match { + template.variables match { case Some(vars) => vars.isEmpty case None => false } } - override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { - delegate.evaluate(values, writer) - } - - override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { - delegate.evaluate(values, writer, evaluationConfig) - } - /** Returns entity variable names (those with empty scope). */ private def entityVariableNames: Seq[String] = { - delegate.variables match { + template.variables match { case Some(vars) => vars.filter(_.scope.isEmpty).map(_.name).distinct case None => @@ -100,7 +96,7 @@ class SparqlCompiledTemplate(delegate: CompiledTemplate) extends CompiledTemplat /** Returns variable names for a specific scope (e.g. "inputProperties", "outputProperties"). */ private def taskPropertyVariableNames(scope: String): Seq[String] = { - delegate.variables match { + template.variables match { case Some(vars) => vars.filter(_.scope == scope).map(_.name).distinct case None => diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala index 9c240d00f3..b4491a312f 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala @@ -1,14 +1,11 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.apache.jena.update.UpdateFactory import org.apache.velocity.runtime.parser.node._ import org.silkframework.runtime.plugin.annotations.Plugin -import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableName, TemplateVariableValue} -import org.silkframework.runtime.validation.ValidationException +import org.silkframework.runtime.templating._ -import java.io.{StringWriter, Writer} -import scala.collection.JavaConverters._ -import scala.util.{Failure, Success, Try} +import java.io.Writer +import scala.jdk.CollectionConverters.MapHasAsScala /** * A SPARQL Update templating engine based on Velocity. @@ -76,7 +73,7 @@ class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends C } } - override def usesRawUnsafe(): Boolean = { + def usesRawUnsafe(): Boolean = { SparqlVelocityTemplating.templatingVariables.exists { variableName => variableMethodUsages(variableName).exists(_.rowMethod == rawUnsafeMethodName) } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index 65a7789a41..de7f2c786d 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -83,7 +83,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { def parse(sparqlUpdateTemplate: String, batchSize: Int = 2): Seq[SparqlUpdateTemplatePart] = { val compiled = SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate) - new SparqlCompiledTemplate(compiled).validate(batchSize) + new SparqlTemplate(compiled).validate(batchSize) compiled.sparqlUpdateTemplateParts } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala index e8a6713742..262d03628c 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala @@ -73,6 +73,6 @@ class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { } def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlCompiledTemplate(SparqlVelocityTemplateEngine().compile(template)).validate(batchSize) + new SparqlTemplate(SparqlVelocityTemplateEngine().compile(template)).validate(batchSize) } } From 56b4a5625d7a2cf28001a4e74173f178c64ebc45 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Fri, 20 Mar 2026 17:55:30 +0100 Subject: [PATCH 06/63] Move JinjaTemplateEngine to it's own silk module so that it can be used by the silk-rdf package. --- build.sbt | 12 +- ....silkframework.runtime.plugin.PluginModule | 1 + .../jinja/JinjaTemplateEngine.scala | 132 ++++++++++++ .../jinja/JinjaTemplatingPlugins.scala | 7 + .../jinja/JinjaVariableCollector.scala | 163 +++++++++++++++ .../templating/jinja/TransformFilters.scala | 53 +++++ .../templating/jinja/JinjaEngineTest.scala | 189 ++++++++++++++++++ .../jinja/JinjaVariableCollectorTest.scala | 120 +++++++++++ 8 files changed, 675 insertions(+), 2 deletions(-) create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplatingPlugins.scala create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/TransformFilters.scala create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala diff --git a/build.sbt b/build.sbt index 92b78e1ad5..afcf59a8ba 100644 --- a/build.sbt +++ b/build.sbt @@ -147,6 +147,14 @@ lazy val workspace = (project in file("silk-workspace")) // Plugins ////////////////////////////////////////////////////////////////////////////// +lazy val pluginsTemplatingJinja = (project in file("silk-plugins/silk-plugins-templating-jinja")) + .dependsOn(rules % "compile->compile;test->test", workbenchCore) + .settings(commonSettings *) + .settings( + name := "Silk Plugins Templating Jinja", + libraryDependencies += "com.hubspot.jinjava" % "jinjava" % "2.8.3" + ) + lazy val pluginsRdf = (project in file("silk-plugins/silk-plugins-rdf")) .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile") .settings(commonSettings *) @@ -222,8 +230,8 @@ lazy val persistentCaching = (project in file("silk-plugins/silk-persistent-cach // Aggregate all plugins lazy val plugins = (project in file("silk-plugins")) - .dependsOn(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching) - .aggregate(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching) + .dependsOn(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching, pluginsTemplatingJinja) + .aggregate(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching, pluginsTemplatingJinja) .settings(commonSettings *) .settings( name := "Silk Plugins" diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule b/silk-plugins/silk-plugins-templating-jinja/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule new file mode 100644 index 0000000000..4d8fd8f332 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule @@ -0,0 +1 @@ +org.silkframework.plugins.templating.jinja.JinjaTemplatingPlugins diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala new file mode 100644 index 0000000000..91e11ab0ab --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala @@ -0,0 +1,132 @@ +package org.silkframework.plugins.templating.jinja + +import com.hubspot.jinjava.interpret.{InterpretException, JinjavaInterpreter, UnknownTokenException} +import com.hubspot.jinjava.tree.Node +import com.hubspot.jinjava.{Jinjava, JinjavaConfig} +import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.templating.exceptions.{TemplateEvaluationException, UnboundVariablesException} +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableName, TemplateVariableValue} + +import java.io.Writer +import java.util.EmptyStackException +import scala.jdk.CollectionConverters.ListHasAsScala +import scala.util.control.Breaks.{break, breakable} + +@Plugin( + id = JinjaTemplateEngine.id, + label = "Jinja" +) +case class JinjaTemplateEngine() extends TemplateEngine { + + override def compile(templateString: String): JinjaTemplate = { + new JinjaTemplate(JinjaTemplateEngine.interpreter().parse(templateString)) + } +} + +object JinjaTemplateEngine { + + final val id = "jinja" + + private val interpreters = new ThreadLocal[JinjavaInterpreter] { + override protected def initialValue(): JinjavaInterpreter = { + // There is a bug in Jinja 2.6.0, if a different context class loader is used: https://github.com/HubSpot/jinjava/issues/317 + val curClassLoader = Thread.currentThread.getContextClassLoader + try { + Thread.currentThread.setContextClassLoader(this.getClass.getClassLoader) + val config = JinjavaConfig.newBuilder.withFailOnUnknownTokens(true).build() + val jinja = new Jinjava(config) + TransformFilters.register(jinja.getGlobalContext) + val interpreter = jinja.newInterpreter() + JinjavaInterpreter.pushCurrent(interpreter) // Macros will request the current interpreter (thread-local) + interpreter + } finally { + Thread.currentThread.setContextClassLoader(curClassLoader) + } + } + } + + /** + * Retrieves an interpreter instance. + */ + def interpreter(): JinjavaInterpreter = { + val inter = interpreters.get() + // We need to reset a number of properties. + // It would be better to change this to create a fresh instance on every call. But then we need to check carefully for memory leaks. + inter.getContext.reset() + inter.getContext.clear() + breakable { + while(true) { + try { + inter.getContext.popRenderStack() + } catch { + case _: EmptyStackException => + break() + } + } + } + do { + inter.removeLastError() + } while(!inter.getLastError.isEmpty) + inter + } + +} + +class JinjaTemplate(val node: Node) extends CompiledTemplate { + + override val variables: Option[Seq[TemplateVariableName]] = { + Some(new JinjaVariableCollector().collect(node).unboundVars) + } + + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig = EvaluationConfig()): Unit = { + // Check if values for all variables are provided + // We do this explicitly because the Jinja-internal checks are not sufficient + // (The implementation ignores expressions with filters and only returns the first missing var) + var missingVars: Seq[TemplateVariableName] = Seq.empty + for (vars <- variables) { + // Collect all scoped variables of the form 'scope.name' + val names = values.map(_.asName) + // Variables of the form 'scope.name' can also be addressed as just 'scope' + val scopes = values.map(v => new TemplateVariableName(v.scope, "")) + // Find missing vars + val existingVars = (names ++ scopes).toSet + missingVars = vars.filterNot(existingVars.contains) + } + if (missingVars.nonEmpty) { + if(evaluationConfig.ignoreUnboundVariables) { + // Leave unbound variables as they are in the result. + val extendedValues = values ++ missingVars.map(mv => new TemplateVariableValue(mv.name, mv.scope, Seq(mv.scopedName))) + evaluate(convertValues(extendedValues), writer) + } else { + throw new UnboundVariablesException(missingVars) + } + } else { + evaluate(convertValues(values), writer) + } + } + + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + // Render the template + val interpreter = JinjaTemplateEngine.interpreter() + for ((key, value) <- values) { + interpreter.getContext.put(key, value) + } + try { + writer.write(interpreter.render(node, false)) + } catch { + case ex: UnknownTokenException => + throw new UnboundVariablesException(Seq(TemplateVariableName.parse(ex.getToken)), Some(ex)) + case ex: InterpretException => + throw new TemplateEvaluationException(ex.getMessage, Some(ex)) + } + + // For now, we just throw any errors. In the future, we could improve this and add an error collector. + if (!interpreter.getErrors.isEmpty) { + val msg = "Errors in template: " + interpreter.getErrors.asScala.map(_.getMessage).mkString(" ") + val cause = Option(interpreter.getErrors.get(0).getException) + throw new TemplateEvaluationException(msg, cause) + } + } + + +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplatingPlugins.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplatingPlugins.scala new file mode 100644 index 0000000000..b7b6091c13 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplatingPlugins.scala @@ -0,0 +1,7 @@ +package org.silkframework.plugins.templating.jinja + +import org.silkframework.runtime.plugin.{AnyPlugin, PluginModule} + +class JinjaTemplatingPlugins extends PluginModule { + override def pluginClasses: Seq[Class[_ <: AnyPlugin]] = Seq(classOf[JinjaTemplateEngine]) +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala new file mode 100644 index 0000000000..b96ba155f6 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala @@ -0,0 +1,163 @@ +package org.silkframework.plugins.templating.jinja + +import com.hubspot.jinjava.el.ExtendedSyntaxBuilder +import com.hubspot.jinjava.lib.tag._ +import com.hubspot.jinjava.tree.parse.ExpressionToken +import com.hubspot.jinjava.tree.{ExpressionNode, Node, TagNode} +import com.hubspot.jinjava.util.HelperStringTokenizer +import jinjava.de.odysseus.el.tree.TreeBuilderException +import jinjava.de.odysseus.el.tree.impl.ast.{AstDot, AstEval} +import org.silkframework.runtime.templating.TemplateVariableName + +import scala.collection.immutable.ArraySeq +import scala.jdk.CollectionConverters.{IterableHasAsScala, ListHasAsScala} + +/** + * Collects all referenced variables in a Jinja template. + */ +class JinjaVariableCollector { + + private val EXPRESSION_START_TOKEN = "#{" + private val EXPRESSION_END_TOKEN = "}" + + private val builder = new ExtendedSyntaxBuilder + + /** + * Collects all variable names from a Jinja template node. + */ + def collect(node: Node, scope: Scope = Scope.empty): Scope = { + node match { + case tagNode: TagNode => + collectFromTag(tagNode, scope) + case exprNode: ExpressionNode => + scope ++ collectFromExpression(exprNode.getMaster.asInstanceOf[ExpressionToken].getExpr) + case _ => + collectFromChildren(node, scope) + } + } + + /** + * Collects all variable names from a Jinja template tag. + * Needs to copy code from the individual tags to replicate behaviour. + */ + private def collectFromTag(tagNode: TagNode, scope: Scope): Scope = { + tagNode.getTag match { + case _: IfTag | _: ElseIfTag | _: DoTag => + scope ++ collectFromExpression(tagNode.getHelpers) ++ collectFromChildren(tagNode, scope) + case _: ForTag => + // Parses expressions of the form "loopVars in loopedVars" + val parts = tagNode.getHelpers.split("\\s+in\\s+") + if (parts.length == 2) { + val loopVars = new HelperStringTokenizer(parts(0)).splitComma(true).allTokens + val loopedVars = collectFromExpression(parts(1)) + val childVars = collectFromChildren(tagNode, scope.withBoundNames(loopVars.asScala.toSeq)) + val filtedChildVars = childVars.unboundVars.filterNot(v => v.scope == "loop" || v.name == "loop" ) + loopedVars.withUnbound(filtedChildVars) + } else { + collectFromChildren(tagNode, scope) + } + case _: SetTag => + val expression = tagNode.getHelpers + val eqPos = expression.indexOf('=') + if(eqPos != -1) { + val leftVars = ArraySeq.unsafeWrapArray(expression.substring(0, eqPos).trim.split("\\s*,\\s*")) + val rightVars = collectFromExpression("[" + expression.substring(eqPos + 1) + "]") + scope.withBoundNames(leftVars) ++ rightVars + } else { + scope + } + case _: MacroTag => + // Add all parameters as bound variables to the scope + val functionScope = scope.withBound(collectFromExpression(tagNode.getHelpers).unboundVars) + // Collect any unbound variables within the macro + collectFromChildren(tagNode, functionScope) + case _ => + collectFromChildren(tagNode, scope) + } + } + + private def collectFromChildren(node: Node, scope: Scope): Scope = { + var curScope = scope + for(child <- node.getChildren.asScala) { + val newScope = collect(child, curScope) + curScope = newScope + } + // Any newly bound variable is not valid outside of this node's children + curScope.copy(boundVars = scope.boundVars) + } + + /** + * Parses an expression from a Jinja template and collects all variable names. + * Expressions are used in tags, such as in if and for expressions. + */ + private def collectFromExpression(expression: String): Scope = { + try { + val tree = builder.build(EXPRESSION_START_TOKEN + expression + EXPRESSION_END_TOKEN) + // Manually treat simple expressions of the form `project.variable` + expression match { + case JinjaVariableCollector.scopedName(scope, name) => + Scope( + unboundVars = Seq(new TemplateVariableName(name, scope)) + ) + case _ => + Scope( + unboundVars = tree.getIdentifierNodes.asScala.map(_.getName).filterNot(ignoreIdentifierNode).toSeq.map(new TemplateVariableName(_, "")) + ) + } + } catch { + case _: TreeBuilderException => + Scope.empty + } + } + + private def ignoreIdentifierNode(name: String): Boolean = { + name.startsWith("___") || // internal identifier + name.startsWith("filter:") || // Jinja filter + name.startsWith("exptest:") // Jinja test + } + + /** + * Holds all bound and unbound variables at a specific node in the AST. + */ + case class Scope(unboundVars: Seq[TemplateVariableName], boundVars: Seq[TemplateVariableName] = Seq.empty) { + + def withBoundNames(varNames: Seq[String]): Scope = { + withBound(varNames.map(new TemplateVariableName(_, ""))) + } + + def withBound(varNames: Seq[TemplateVariableName]): Scope = { + copy(boundVars = (boundVars ++ varNames).distinct) + } + + def withUnbound(varNames: Seq[TemplateVariableName]): Scope = { + copy(unboundVars = (unboundVars ++ varNames).distinct) + } + + /** + * Adds a scope from a subsequent node. + */ + def ++(scope: Scope): Scope = { + val boundVarsSet = boundVars.toSet + Scope( + unboundVars = (unboundVars ++ scope.unboundVars).distinct.filterNot(boundVarsSet), + boundVars = (boundVars ++ scope.boundVars).distinct + ) + } + + } + + object Scope { + def empty: Scope = Scope(Seq.empty, Seq.empty) + } + +} + +object JinjaVariableCollector { + + // Regex for valid variable names + private val variableRegex = "[a-zA-Z_][a-zA-Z0-9_]*".r + + // Regex for scoped names of the form scope.var + private val scopedName = s"($variableRegex)\\.($variableRegex)".r + +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/TransformFilters.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/TransformFilters.scala new file mode 100644 index 0000000000..adcda3aaf4 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/TransformFilters.scala @@ -0,0 +1,53 @@ +package org.silkframework.plugins.templating.jinja + +import com.hubspot.jinjava.interpret.{Context, JinjavaInterpreter} +import com.hubspot.jinjava.lib.filter.Filter +import org.silkframework.rule.input.Transformer +import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, PluginDescription, PluginRegistry} +import org.silkframework.runtime.templating.IterableTemplateValues + +/** + * Makes transformer plugins available as Jinja filters. + */ +object TransformFilters { + + /** + * Registers all available transformers as Jinja filters. + */ + def register(context: Context): Unit = { + for(transformerPlugin <- PluginRegistry.availablePlugins[Transformer]) { + if(context.getFilter(transformerPlugin.id) == null) { + context.registerFilter(new TransformFilter(transformerPlugin)) + } + } + } + + /** + * A Jinja filter that is based on a transformer. + */ + class TransformFilter(transformerPlugin: PluginDescription[Transformer]) extends Filter { + + override def getName: String = transformerPlugin.id + + override def filter(value: Any, interpreter: JinjavaInterpreter, args: String*): AnyRef = { + // Create transformer instance with parameters + implicit val pluginContext: PluginContext = PluginContext.empty + val paramValues = + for((param, value) <- transformerPlugin.parameters zip args) yield { + (param.name, value) + } + val transformer = transformerPlugin(ParameterValues.fromStringMap(paramValues.toMap)) + + // Evaluate transformer + val inputValues = value match { + case r: IterableTemplateValues => r.values + case v: Any => Seq(v.toString) + } + val transformedValues = transformer(Seq(inputValues)) + + // Return result + IterableTemplateValues.fromValues(transformedValues) + } + } + +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala new file mode 100644 index 0000000000..37cde35ce7 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala @@ -0,0 +1,189 @@ +package org.silkframework.plugins.templating.jinja + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import org.silkframework.runtime.templating.TemplateVariableValue +import org.silkframework.runtime.templating.exceptions.UnboundVariablesException + +import java.io.{StringWriter, Writer} +import scala.collection.immutable.ArraySeq +import scala.jdk.CollectionConverters.{MapHasAsJava, SeqHasAsJava} + +class JinjaEngineTest extends AnyFlatSpec with Matchers { + + behavior of "JinjaEngine" + + it should "fail if variables are not bound" in { + intercept[UnboundVariablesException]( + evaluate( + template = "{{name}} {{location}}", + values = Map("firstName"-> Seq("John")) + ) + ).missingVars.map(_.name) shouldBe Seq("name", "location") + + intercept[UnboundVariablesException]( + evaluate( + template = "{{name | lower}}", + values = Map("firstName"-> Seq("John")) + ) + ).missingVars.map(_.name) shouldBe Seq("name") + } + + it should "support transformer plugins to be used as filters" in { + evaluate( + template = "{{name | lowerCase}}", + values = Map("name"-> Seq("John")) + ) shouldBe "john" + + evaluate( + template = "{{names | removeDuplicates | concatMultiValues(', ')}}", + values = Map("names"-> Seq("John", "Max", "John")) + ) shouldBe "John, Max" + } + + it should "support combining built-in filters with DataIntegration transformer filters" in { + evaluate( + template = "{{input | lower | tokenize(',') | join('-')}}", + values = Map("input"-> Seq("A,B,C")) + ) shouldBe "a-b-c" + } + + it should "support complex templates" in { + val template = + """ + | {% for user in users %} + | {% set location, country = city, 'Germany' %} + | {{user}} is from {{location}}, {{country}} + | {% endfor %} + | + |""".stripMargin + + val values = Map( + "users"-> Seq("John", "Max"), + "city" -> Seq("Berlin") + ) + + val expectedLines = Seq( + "John is from Berlin, Germany", + "Max is from Berlin, Germany" + ) + + lines(evaluate(template, values)) shouldBe expectedLines + } + + it should "support templates with macros" in { + val template = + """ {% macro foo() %} + | {{ "hello world" }} + | {% endmacro %} + | {{ foo() }}""".stripMargin + + lines(evaluate(template, Map.empty)) shouldBe Seq("hello world") + } + + it should "support templates with macros with parameters" in { + val template = + """ {% macro foo(name) %} + | Hello {{name}} + | {% endmacro %} + | {{ foo('John') }}""".stripMargin + + lines(evaluate(template, Map.empty)) shouldBe Seq("Hello John") + } + + it should "support loop cycle helper" in { + val template = + """{% for nr in nrs %} + | {{ loop.cycle('odd', 'even') }} {{nr}} + |{% endfor %}""".stripMargin + + lines(evaluate(template, Map("nrs" -> Seq("1", "2")))) shouldBe Seq("odd 1", "even 2") + } + + it should "support call and caller()" in { + val template = """{% macro renderIt(title, class='default') -%} + | {{title}} ({{class}}) [{{caller()}}] + |{%- endmacro %} + | + |{% call renderIt('Titel') %} + |caller text + |{% endcall %}""".stripMargin + + lines(evaluate(template, Map.empty)) shouldBe Seq("Titel (default) [", "caller text", "]") + } + + // FIXME: jinjava does not support filter tags yet + it should "support filter blocks" ignore { + val template = """{% filter upper %} + | to upper + |{% endfilter %}""".stripMargin + evaluate(template, Map.empty).trim shouldBe "TO UPPER" + } + + it should "set and use variables" in { + val template = + """{% set newVar = "new var" %} + |A {{newVar}} + |""".stripMargin + evaluate(template, Map.empty).trim shouldBe "A new var" + } + + it should "support to set and use nested variables" in { + val template = + """{% set nested = ({"sub": {"label": "Label"}}) %} + |A {{nested.sub.label}} + |""".stripMargin + lines(evaluate(template, Map.empty)) shouldBe Seq("A Label") + } + + it should "be able to access global functions" in { + val template = """{% for number in range(1, 3) %} + | {{number}} + |{% endfor %}""".stripMargin + lines(evaluate(template, Map.empty)) shouldBe Seq("1", "2") + } + + it should "support sorting value" in { + val template = + """ + | {% for e in entities | sort(false, false, 'order') %} + | {{ e.item }}: {{ e.order }} + | {% endfor %} + | + |""".stripMargin + + val values = Map( + "entities"-> Seq(Map("item" -> "1", "order" -> "2").asJava, Map("item" -> "2", "order" -> "1").asJava).asJava + ) + + val expectedLines = Seq( + "2: 1", + "1: 2" + ) + + lines(evaluateRaw(template, values)) shouldBe expectedLines + } + + private def evaluate(template: String, values: Map[String, Seq[String]]): String = { + val writer = new StringWriter() + val compileTemplate = JinjaTemplateEngine().compile(template) + val templateValues = + for((name, value) <- values.toSeq) yield { + new TemplateVariableValue(name, "", value) + } + compileTemplate.evaluate(templateValues, writer) + writer.toString + } + + private def evaluateRaw(template: String, values: Map[String, AnyRef]): String = { + val writer = new StringWriter() + val compileTemplate = JinjaTemplateEngine().compile(template) + compileTemplate.evaluate(values, writer: Writer) + writer.toString + } + + private def lines(str: String): Seq[String] = { + ArraySeq.unsafeWrapArray(str.split("\\s*[\n\r]+\\s*").filter(_.nonEmpty)) + } + +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala new file mode 100644 index 0000000000..fffaee6002 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala @@ -0,0 +1,120 @@ +package org.silkframework.plugins.templating.jinja + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class JinjaVariableCollectorTest extends AnyFlatSpec with Matchers { + + behavior of "JinjaVariableCollector" + + it should "collect plain variable replacements" in { + collect("This is {{name}} from {{city}}.") shouldBe Seq("name", "city") + } + + it should "collect variables in conditions" in { + collect( + """ + | {% if title == "Mayor" %} + | This is the Mayor. + | {% else %} + | This is {{name}}. + | {% endif %} + |""".stripMargin) shouldBe Seq("title", "name") + + collect( + """ + | {% if var1 == "1" %} + | Case 1 + | {% elif var2 == "2" %} + | Case 2 + | {% if var3 == "3" %} + | Case 3 + | {% elif var4 == "4" %} + | Case 4 + | {% endif %} + | {% endif %} + |""".stripMargin) shouldBe Seq("var1", "var2", "var3", "var4") + } + + it should "collect variables inside expressions" in { + val variables = collect("{{names | removeDuplicates | concatMultiValues(', ')}}") + variables shouldBe Seq("names") + } + + it should "collect variables inside do statements" in { + val variables = collect("{% do name %}") + variables shouldBe Seq("name") + } + + it should "collect variables in for-loops" in { + collect( + """ + | {% for user in users %} + | This is {{user}} at index {{loop.index}}. + | {% endfor %} + |""".stripMargin) shouldBe Seq("users") + + collect( + """ + | {% for user in users %} + | {{ inputs | getValueByIndex(loop.index) }} + | {% endfor %} + |""".stripMargin) shouldBe Seq("users", "inputs") + } + + it should "collect variables in set expressions" in { + collect( + """ + | {% set location, country, timestamp = city, 'Germany', time %} + | {{user}} is from {{location}}, {{country}} ({{timestamp}}) + | + |""".stripMargin) shouldBe Seq("city", "time", "user") + collect( + """ + | {% for user in users %} + | {% set location, country, timestamp = city, 'Germany', time %} + | {{user}} is from {{location}}, {{country}} ({{timestamp}}) + | {% endfor %} + | + |""".stripMargin) shouldBe Seq("users", "city", "time") + } + + it should "collect variables in tests" in { + collect( + """ + | {% if title is defined %} + | Is defined + | {% endif %} + |""".stripMargin) shouldBe Seq("title") + } + + it should "collect scoped variables in simple expressions" in { + collect("This is {{project.name}} from {{global.city}}.") shouldBe Seq("project.name", "global.city") + } + + it should "don't fail on empty expressions" in { + collect("{{ }}".stripMargin) shouldBe Seq.empty + } + + it should "don't collect bound variables in macros" in { + collect( + """ {% macro foo(name) %} + | Hello {{name}} + | {% endmacro %} + | {{ foo('John') }}""".stripMargin) shouldBe Seq() + } + + it should "collect unbound variables in macros" in { + collect( + """ {% macro foo(street, number) %} + | {{street}} {{number}}, {{country}} + | {% endmacro %} + | {{ foo('Hainstraße', '8') }}""".stripMargin) shouldBe Seq("country") + } + + private def collect(template: String): Seq[String] = { + val node = JinjaTemplateEngine().compile(template).node + new JinjaVariableCollector().collect(node).unboundVars.map(_.scopedName) + } + +} From f6854a680567aeea747c77b0edc0ba79060bb0c5 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Fri, 20 Mar 2026 19:11:28 +0100 Subject: [PATCH 07/63] Move SPARQL specific logic in SparqlVelocityTemplateEngine to SparqlTemplate and make it a generic template engine. --- .../plugins/dataset/rdf/RdfPlugins.scala | 4 +- .../rdf/tasks/templating/SparqlTemplate.scala | 43 ++++++--- .../SparqlTemplatingEngineVelocity.scala | 92 +++++++++---------- .../rdf/LocalSparqlUpdateExecutorTest.scala | 6 +- .../SparqlTemplatingEngineVelocityTest.scala | 8 +- 5 files changed, 78 insertions(+), 75 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala index 5d0f32aa67..c94e3682e4 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala @@ -3,7 +3,7 @@ package org.silkframework.plugins.dataset.rdf import org.silkframework.plugins.dataset.rdf.datasets.{AlignmentDataset, InMemoryDataset, RdfFileDataset, SparqlDataset} import org.silkframework.plugins.dataset.rdf.executors.{LocalSparqlCopyExecutor, LocalSparqlSelectExecutor, LocalSparqlUpdateExecutor} import org.silkframework.plugins.dataset.rdf.tasks.{SparqlCopyCustomTask, SparqlSelectCustomTask, SparqlUpdateCustomTask} -import org.silkframework.plugins.dataset.rdf.tasks.templating.{SparqlSimpleTemplateEngine, SparqlVelocityTemplateEngine} +import org.silkframework.plugins.dataset.rdf.tasks.templating.{SparqlSimpleTemplateEngine, VelocityTemplateEngine} import org.silkframework.plugins.dataset.rdf.vocab.{InMemoryVocabularyManager, RdfFilesVocabularyManager, RdfProjectFilesVocabularyManager, RdfVocabularyManager} import org.silkframework.runtime.plugin.{AnyPlugin, PluginModule} @@ -23,7 +23,7 @@ class RdfPlugins extends PluginModule { classOf[SparqlCopyCustomTask], classOf[SparqlUpdateCustomTask], classOf[SparqlSimpleTemplateEngine], - classOf[SparqlVelocityTemplateEngine] + classOf[VelocityTemplateEngine] ) ++ executors val executors = Seq( diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index ba3eb3ca17..efa5983feb 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -4,11 +4,10 @@ import org.apache.jena.update.UpdateFactory import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.runtime.templating.CompiledTemplate +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateVariableName} import org.silkframework.runtime.validation.ValidationException import java.io.StringWriter -import scala.jdk.CollectionConverters.{MapHasAsJava, MapHasAsScala} import scala.util.{Failure, Success, Try} /** @@ -18,23 +17,24 @@ class SparqlTemplate(template: CompiledTemplate) { /** Renders the template based on the variable assignments. */ def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { - val values = new java.util.LinkedHashMap[String, AnyRef]() - placeholderAssignments.foreach { case (k, v) => values.put(k, v) } - if (taskProperties.inputTask.nonEmpty) { - values.put("inputProperties", taskProperties.inputTask.asJava) - } - if (taskProperties.outputTask.nonEmpty) { - values.put("outputProperties", taskProperties.outputTask.asJava) - } + val values = scala.collection.mutable.LinkedHashMap[String, AnyRef]() + // Flat entity values (used by simple template engine) + placeholderAssignments.foreach { case (k, v) => values(k) = v } + // SPARQL context objects (used by Velocity engine) + values(SparqlVelocityTemplating.ROW_VAR_NAME) = Row(placeholderAssignments) + values(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) + values(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) val writer = new StringWriter() - template.evaluate(values.asScala.toMap, writer) + template.evaluate(values.toMap, writer) writer.toString } /** Validates the template, including batch validation if batchSize > 1. */ def validate(batchSize: Int): Unit = { template match { - case compiled: SparqlVelocityCompiledTemplate if compiled.usesRawUnsafe() => + case compiled: VelocityCompiledTemplate + if SparqlVelocityTemplating.templatingVariables.exists(varName => + compiled.variableMethodUsages(varName).exists(_.rowMethod == "rawUnsafe")) => // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. case _ => // Generate example input assignments @@ -78,15 +78,28 @@ class SparqlTemplate(template: CompiledTemplate) { /** True if the given template is static, i.e. contains no placeholder variables. */ def isStaticTemplate: Boolean = { - template.variables match { + sparqlVariables match { case Some(vars) => vars.isEmpty case None => false } } + /** Returns SPARQL-specific variables, extracting paths from method usages for Velocity templates. */ + private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = template match { + case compiled: VelocityCompiledTemplate => + val rowVars = compiled.variableMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "")) + val inputPropVars = compiled.variableMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) + val outputPropVars = compiled.variableMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) + Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) + case _ => template.variables + } + /** Returns entity variable names (those with empty scope). */ private def entityVariableNames: Seq[String] = { - template.variables match { + sparqlVariables match { case Some(vars) => vars.filter(_.scope.isEmpty).map(_.name).distinct case None => @@ -96,7 +109,7 @@ class SparqlTemplate(template: CompiledTemplate) { /** Returns variable names for a specific scope (e.g. "inputProperties", "outputProperties"). */ private def taskPropertyVariableNames(scope: String): Seq[String] = { - template.variables match { + sparqlVariables match { case Some(vars) => vars.filter(_.scope == scope).map(_.name).distinct case None => diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala index b4491a312f..18a6a97e1b 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala @@ -1,107 +1,97 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating +import org.apache.velocity.VelocityContext import org.apache.velocity.runtime.parser.node._ import org.silkframework.runtime.plugin.annotations.Plugin import org.silkframework.runtime.templating._ import java.io.Writer -import scala.jdk.CollectionConverters.MapHasAsScala /** - * A SPARQL Update templating engine based on Velocity. + * A general-purpose templating engine based on Apache Velocity. */ @Plugin( - id = SparqlVelocityTemplateEngine.id, - label = "SPARQL Velocity", - description = "A SPARQL Update templating engine based on Apache Velocity." + id = VelocityTemplateEngine.id, + label = "Velocity Engine", + description = "A templating engine based on Apache Velocity." ) -case class SparqlVelocityTemplateEngine() extends TemplateEngine { +case class VelocityTemplateEngine() extends TemplateEngine { - override def compile(templateString: String): SparqlVelocityCompiledTemplate = { - new SparqlVelocityCompiledTemplate(templateString) + override def compile(templateString: String): VelocityCompiledTemplate = { + new VelocityCompiledTemplate(templateString) } } -object SparqlVelocityTemplateEngine { +object VelocityTemplateEngine { final val id = "velocity" } /** - * A compiled SPARQL Update template based on Velocity. + * A compiled template based on Apache Velocity. */ -class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends CompiledTemplate { +class VelocityCompiledTemplate(val templateString: String) extends CompiledTemplate { - private val sparqlTemplate = SparqlVelocityTemplating.createTemplate(sparqlUpdateTemplate) + private val velocityTemplate = SparqlVelocityTemplating.createTemplate(templateString) override lazy val variables: Option[Seq[TemplateVariableName]] = { - val rowVars = variableMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "")) - val inputPropVars = variableMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) - val outputPropVars = variableMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) - Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) + Some(extractVariableReferences(velocityTemplate.getData.asInstanceOf[SimpleNode]) + .map(name => new TemplateVariableName(name, "")).distinct) } override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { - // Separate entity variables from task property variables - val entityVars = scala.collection.mutable.Map[String, String]() - var inputProps = Map.empty[String, String] - var outputProps = Map.empty[String, String] - values.foreach { - case ("inputProperties", m: java.util.Map[_, _]) => - inputProps = m.asScala.map { case (k, v) => String.valueOf(k) -> String.valueOf(v) }.toMap - case ("outputProperties", m: java.util.Map[_, _]) => - outputProps = m.asScala.map { case (k, v) => String.valueOf(k) -> String.valueOf(v) }.toMap - case (k, v) => - entityVars(k) = String.valueOf(v) - } - writer.write(SparqlVelocityTemplating.renderTemplate(sparqlTemplate, Row(entityVars.toMap), TaskProperties(inputProps, outputProps))) + val context = new VelocityContext() + values.foreach { case (k, v) => context.put(k, v) } + writer.write(SparqlVelocityTemplating.renderTemplate(velocityTemplate, context)) } override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { evaluate(convertValues(values), writer) } - // Extracts all method invocations on the given variable name in the config + /** Extracts top-level variable references from the Velocity AST. */ + private def extractVariableReferences(node: Node): List[String] = { + node match { + case ref: ASTReference => + List(ref.getRootString) + case other: SimpleNode => + (0 until other.jjtGetNumChildren()).flatMap(idx => extractVariableReferences(other.jjtGetChild(idx))).toList + case _ => + List.empty + } + } + + // Extracts all method invocations on the given variable name in the template AST private[templating] def variableMethodUsages(variableName: String): Seq[TemplateVariableMethodUsage] = { - sparqlTemplate.getData match { + velocityTemplate.getData match { case simpleNode: SimpleNode => - retrieveRowMethodUsages(simpleNode, variableName) + retrieveMethodUsages(simpleNode, variableName) case None => throw new RuntimeException(s"Unexpected error: Cannot retrieve $variableName object method usages from Velocity template.") } } - def usesRawUnsafe(): Boolean = { - SparqlVelocityTemplating.templatingVariables.exists { variableName => - variableMethodUsages(variableName).exists(_.rowMethod == rawUnsafeMethodName) - } - } - - private val rawUnsafeMethodName = "rawUnsafe" + private final val methodsWithStringParameter = Set("uri", "plainLiteral", "rawUnsafe", "exists") - private final val rowMethodsWithPathParameter = Set("uri", "plainLiteral", rawUnsafeMethodName, "exists") - /** Retrieves the input paths that are used via the [[Row]] API. */ - private def retrieveRowMethodUsages(simpleNode: Node, varName: String): List[TemplateVariableMethodUsage] = { + /** Retrieves method usages on a given variable from the AST. */ + private def retrieveMethodUsages(simpleNode: Node, varName: String): List[TemplateVariableMethodUsage] = { simpleNode match { case astMethod: ASTMethod => astReferenceName(astMethod.jjtGetParent()) match { case Some(v) if v == varName && - rowMethodsWithPathParameter.contains(astMethod.getMethodName) && - validStringRowMethodParameter(astMethod) => + methodsWithStringParameter.contains(astMethod.getMethodName) && + validStringMethodParameter(astMethod) => val parameterValue = astMethod.jjtGetChild(1).jjtGetChild(0).asInstanceOf[ASTStringLiteral].literal().stripPrefix("\"").stripSuffix("\"") List(TemplateVariableMethodUsage(astMethod.getMethodName, parameterValue)) case _ => List.empty } case other: SimpleNode => - retrieveChildRowMethodUsages(other, varName) + retrieveChildMethodUsages(other, varName) } } // Make sure that there is a single string constant as parameter - private def validStringRowMethodParameter(astMethod: ASTMethod): Boolean = { + private def validStringMethodParameter(astMethod: ASTMethod): Boolean = { astMethod.jjtGetNumChildren() == 2 && { val parameter = astMethod.jjtGetChild(1) parameter.isInstanceOf[ASTExpression] && @@ -120,9 +110,9 @@ class SparqlVelocityCompiledTemplate(val sparqlUpdateTemplate: String) extends C } } - private def retrieveChildRowMethodUsages(other: SimpleNode, varName: String): List[TemplateVariableMethodUsage] = { + private def retrieveChildMethodUsages(other: SimpleNode, varName: String): List[TemplateVariableMethodUsage] = { val childPaths = for (idx <- 0 until other.jjtGetNumChildren()) yield { - retrieveRowMethodUsages(other.jjtGetChild(idx), varName) + retrieveMethodUsages(other.jjtGetChild(idx), varName) } childPaths.fold(List.empty[TemplateVariableMethodUsage])((a, b) => a ::: b) } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala index 6f4389d9d7..c3eb4c902f 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala @@ -8,7 +8,7 @@ import org.silkframework.execution.{ExecutionReport, ExecutorOutput} import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution} import org.silkframework.plugins.dataset.rdf.executors.LocalSparqlUpdateExecutor import org.silkframework.plugins.dataset.rdf.tasks.SparqlUpdateCustomTask -import org.silkframework.plugins.dataset.rdf.tasks.templating.{SparqlSimpleTemplateEngine, SparqlVelocityTemplateEngine} +import org.silkframework.plugins.dataset.rdf.tasks.templating.{SparqlSimpleTemplateEngine, VelocityTemplateEngine} import org.silkframework.runtime.activity.{ActivityContext, UserContext} import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, TestPluginContext} import org.silkframework.runtime.validation.ValidationException @@ -96,7 +96,7 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW it should "output one UPDATE query per input task when the template contains input property placeholders" in { val templateWithInputPropertyPlaceholders = """INSERT DATA { $inputProperties.uri("graph") $inputProperties.plainLiteral("graph") };""" val result = executeTask(templateWithInputPropertyPlaceholders, Seq(mockInputTable(Seq("graph" -> "g1")), - mockInputTable(Seq("graph" -> "g2"))), SparqlVelocityTemplateEngine.id) + mockInputTable(Seq("graph" -> "g2"))), VelocityTemplateEngine.id) result.entities.map(_.values.flatten.head).toList mustBe List("INSERT DATA { \"g1\" };\n" + "INSERT DATA { \"g2\" };") } @@ -104,7 +104,7 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW it should "output one UPDATE query overall even for multiple inputs when no placeholder is used at all" in { val staticTemplate = """INSERT DATA { "1" };""" val result = executeTask(staticTemplate, Seq(mockInputTable(Seq("graph" -> "g1")), - mockInputTable(Seq("graph" -> "g2"))), SparqlVelocityTemplateEngine.id) + mockInputTable(Seq("graph" -> "g2"))), VelocityTemplateEngine.id) result.entities.map(_.values.flatten.head).toList mustBe List(staticTemplate) } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala index 262d03628c..11e57d572b 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala @@ -24,8 +24,8 @@ class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { | Raw: $row.rawUnsafe("trustedValuePath") |#end |""".stripMargin - val compiled = SparqlVelocityTemplateEngine().compile(templateString) - compiled.variables.get.filter(_.scope.isEmpty).map(_.name).sorted mustBe Seq("somePath", "subject", "trustedValuePath") + val compiled = VelocityTemplateEngine().compile(templateString) + compiled.variables.get.map(_.name).sorted mustBe Seq("row") } private val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> @@ -39,7 +39,7 @@ class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { it should "validate without problems for valid templates" in { validate(sparqlUpdateTemplate) - SparqlVelocityTemplateEngine().compile(templateWithLogic).variables.get.filter(_.scope.isEmpty).map(_.name).sorted mustBe Seq("input1", "input2") + VelocityTemplateEngine().compile(templateWithLogic).variables.get.map(_.name).sorted mustBe Seq("row") validate(templateWithLogic) } @@ -73,6 +73,6 @@ class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { } def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlTemplate(SparqlVelocityTemplateEngine().compile(template)).validate(batchSize) + new SparqlTemplate(VelocityTemplateEngine().compile(template)).validate(batchSize) } } From 246ba117f8bd5b05bff53030b89d4f4d803e60ed Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 10:35:01 +0100 Subject: [PATCH 08/63] Rename VelocityTemplateEngine scala file --- ...emplatingEngineVelocity.scala => VelocityTemplateEngine.scala} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlTemplatingEngineVelocity.scala => VelocityTemplateEngine.scala} (100%) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala similarity index 100% rename from silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala rename to silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala From 0f7bd4c5ded21e41da3aa22da4aa347c6d7f495a Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 10:46:35 +0100 Subject: [PATCH 09/63] Moved SPARQL specific code out of VelocityTemplateEngine. Added generic methodUsages to TemplateEngine --- .../runtime/templating/TemplateEngine.scala | 18 +++- .../rdf/tasks/templating/SparqlTemplate.scala | 90 +++++++++++-------- .../templating/VelocityTemplateEngine.scala | 25 ++---- 3 files changed, 78 insertions(+), 55 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala index 0d6c20185f..ce6566b986 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala @@ -30,6 +30,14 @@ trait CompiledTemplate { */ def variables: Option[Seq[TemplateVariableName]] = None + /** + * Returns all method usages on a given variable in the template. + * Each usage contains the method name and its string parameter value. + * Only methods with a single string constant parameter are returned. + * Returns an empty sequence by default if not supported by the template engine. + */ + def methodUsages(variableName: String): Seq[TemplateMethodUsage] = Seq.empty + /** * Evaluates this template using a map of variable values. */ @@ -85,4 +93,12 @@ trait CompiledTemplate { * @param ignoreUnboundVariables If an unbound variable is found then instead of throwing an error the variable evaluates * to the variable name itself. */ -case class EvaluationConfig(ignoreUnboundVariables: Boolean = false) \ No newline at end of file +case class EvaluationConfig(ignoreUnboundVariables: Boolean = false) + +/** + * Represents a method invocation on a template variable with a single string parameter. + * + * @param methodName The name of the invoked method. + * @param parameterValue The string constant passed as parameter. + */ +case class TemplateMethodUsage(methodName: String, parameterValue: String) \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index efa5983feb..ee8ac4aa99 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -4,7 +4,7 @@ import org.apache.jena.update.UpdateFactory import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.runtime.templating.{CompiledTemplate, TemplateVariableName} +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateMethodUsage, TemplateVariableName} import org.silkframework.runtime.validation.ValidationException import java.io.StringWriter @@ -31,38 +31,35 @@ class SparqlTemplate(template: CompiledTemplate) { /** Validates the template, including batch validation if batchSize > 1. */ def validate(batchSize: Int): Unit = { - template match { - case compiled: VelocityCompiledTemplate - if SparqlVelocityTemplating.templatingVariables.exists(varName => - compiled.variableMethodUsages(varName).exists(_.rowMethod == "rawUnsafe")) => - // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. - case _ => - // Generate example input assignments - val genericUri = "urn:generic:1" - val entityVariables = entityVariableNames - val assignments = entityVariables.map(_ -> genericUri).toMap - val inputPropVars = taskPropertyVariableNames("inputProperties").map(_ -> genericUri).toMap - val outputPropVars = taskPropertyVariableNames("outputProperties").map(_ -> genericUri).toMap - val taskProps = TaskProperties(inputPropVars, outputPropVars) - val sparqlQuery = Try(generate(assignments, taskProps)) match { - case Failure(exception) => - throw new ValidationException( - "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) - case Success(value) => value - } - Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => + if (usesRawUnsafe) { + // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. + } else { + // Generate example input assignments + val genericUri = "urn:generic:1" + val entityVariables = entityVariableNames + val assignments = entityVariables.map(_ -> genericUri).toMap + val inputPropVars = taskPropertyVariableNames("inputProperties").map(_ -> genericUri).toMap + val outputPropVars = taskPropertyVariableNames("outputProperties").map(_ -> genericUri).toMap + val taskProps = TaskProperties(inputPropVars, outputPropVars) + val sparqlQuery = Try(generate(assignments, taskProps)) match { + case Failure(exception) => throw new ValidationException( - "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparqlQuery) - } - if (batchSize > 1) { - val batchSparql = sparqlQuery + "\n" + sparqlQuery - Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => - throw new ValidationException( - "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) - } + "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) + case Success(value) => value + } + Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + + parseError.getMessage + ", example query: " + sparqlQuery) + } + if (batchSize > 1) { + val batchSparql = sparqlQuery + "\n" + sparqlQuery + Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + + parseError.getMessage + ", example batch query: " + batchSparql) } + } } } @@ -84,17 +81,34 @@ class SparqlTemplate(template: CompiledTemplate) { } } - /** Returns SPARQL-specific variables, extracting paths from method usages for Velocity templates. */ - private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = template match { - case compiled: VelocityCompiledTemplate => - val rowVars = compiled.variableMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME) + /** SPARQL-specific method names that accept a string parameter representing an input path. */ + private val sparqlMethodNames = Set("uri", "plainLiteral", "rawUnsafe", "exists") + + /** Returns SPARQL-specific variables, extracting paths from method usages. */ + private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = { + val usages = SparqlVelocityTemplating.templatingVariables.flatMap(v => template.methodUsages(v)) + if (usages.nonEmpty) { + val rowVars = sparqlMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "")) - val inputPropVars = compiled.variableMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) + val inputPropVars = sparqlMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) - val outputPropVars = compiled.variableMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) + val outputPropVars = sparqlMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) - case _ => template.variables + } else { + template.variables + } + } + + /** Returns method usages on the given variable filtered to SPARQL-specific methods. */ + private def sparqlMethodUsages(variableName: String): Seq[TemplateMethodUsage] = { + template.methodUsages(variableName).filter(u => sparqlMethodNames.contains(u.methodName)) + } + + /** Checks if any SPARQL templating variable uses the rawUnsafe method. */ + private lazy val usesRawUnsafe: Boolean = { + SparqlVelocityTemplating.templatingVariables.exists(varName => + sparqlMethodUsages(varName).exists(_.methodName == "rawUnsafe")) } /** Returns entity variable names (those with empty scope). */ diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala index 18a6a97e1b..98971f1631 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala @@ -3,7 +3,7 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.apache.velocity.VelocityContext import org.apache.velocity.runtime.parser.node._ import org.silkframework.runtime.plugin.annotations.Plugin -import org.silkframework.runtime.templating._ +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} import java.io.Writer @@ -60,8 +60,7 @@ class VelocityCompiledTemplate(val templateString: String) extends CompiledTempl } } - // Extracts all method invocations on the given variable name in the template AST - private[templating] def variableMethodUsages(variableName: String): Seq[TemplateVariableMethodUsage] = { + override def methodUsages(variableName: String): Seq[TemplateMethodUsage] = { velocityTemplate.getData match { case simpleNode: SimpleNode => retrieveMethodUsages(simpleNode, variableName) @@ -70,18 +69,14 @@ class VelocityCompiledTemplate(val templateString: String) extends CompiledTempl } } - private final val methodsWithStringParameter = Set("uri", "plainLiteral", "rawUnsafe", "exists") - /** Retrieves method usages on a given variable from the AST. */ - private def retrieveMethodUsages(simpleNode: Node, varName: String): List[TemplateVariableMethodUsage] = { + private def retrieveMethodUsages(simpleNode: Node, varName: String): List[TemplateMethodUsage] = { simpleNode match { case astMethod: ASTMethod => astReferenceName(astMethod.jjtGetParent()) match { - case Some(v) if v == varName && - methodsWithStringParameter.contains(astMethod.getMethodName) && - validStringMethodParameter(astMethod) => + case Some(v) if v == varName && hasSingleStringParameter(astMethod) => val parameterValue = astMethod.jjtGetChild(1).jjtGetChild(0).asInstanceOf[ASTStringLiteral].literal().stripPrefix("\"").stripSuffix("\"") - List(TemplateVariableMethodUsage(astMethod.getMethodName, parameterValue)) + List(TemplateMethodUsage(astMethod.getMethodName, parameterValue)) case _ => List.empty } @@ -90,8 +85,8 @@ class VelocityCompiledTemplate(val templateString: String) extends CompiledTempl } } - // Make sure that there is a single string constant as parameter - private def validStringMethodParameter(astMethod: ASTMethod): Boolean = { + /** Checks that there is a single string constant as parameter. */ + private def hasSingleStringParameter(astMethod: ASTMethod): Boolean = { astMethod.jjtGetNumChildren() == 2 && { val parameter = astMethod.jjtGetChild(1) parameter.isInstanceOf[ASTExpression] && @@ -110,12 +105,10 @@ class VelocityCompiledTemplate(val templateString: String) extends CompiledTempl } } - private def retrieveChildMethodUsages(other: SimpleNode, varName: String): List[TemplateVariableMethodUsage] = { + private def retrieveChildMethodUsages(other: SimpleNode, varName: String): List[TemplateMethodUsage] = { val childPaths = for (idx <- 0 until other.jjtGetNumChildren()) yield { retrieveMethodUsages(other.jjtGetChild(idx), varName) } - childPaths.fold(List.empty[TemplateVariableMethodUsage])((a, b) => a ::: b) + childPaths.fold(List.empty[TemplateMethodUsage])((a, b) => a ::: b) } } - -case class TemplateVariableMethodUsage(rowMethod: String, parameterValue: String) From 28a6ee859dea64e408ce23ba0aa1d1d7637ab21f Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 10:58:05 +0100 Subject: [PATCH 10/63] Improved some labels for the templating code --- .../rdf/tasks/templating/VelocityTemplateEngine.scala | 2 +- ...neVelocityTest.scala => SparqlTemplateVelocityTest.scala} | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) rename silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlTemplatingEngineVelocityTest.scala => SparqlTemplateVelocityTest.scala} (96%) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala index 98971f1631..2c47f8efd3 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala @@ -12,7 +12,7 @@ import java.io.Writer */ @Plugin( id = VelocityTemplateEngine.id, - label = "Velocity Engine", + label = "Velocity", description = "A templating engine based on Apache Velocity." ) case class VelocityTemplateEngine() extends TemplateEngine { diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala similarity index 96% rename from silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala rename to silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala index 11e57d572b..3c98e570e7 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala @@ -6,8 +6,9 @@ import org.silkframework.runtime.validation.ValidationException import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers -class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { - behavior of "Velocity SPARQL Templating Engine" +class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { + + behavior of "SPARQL templating with the Velocity Template Engine" private val sparqlUpdateTemplate = s"""PREFIX rdf: From 1ed570b5d99fb87794fcd1173bcd9c0ba2985f26 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 12:32:23 +0100 Subject: [PATCH 11/63] Move UnboundVariablesException to templating package. Make it possible for RequestExceptions to provide additional data for the response. --- build.sbt | 4 ++-- .../exceptions/UnboundVariablesException.scala | 17 ++++++----------- .../runtime/validation/RequestException.scala | 5 +++++ .../SparqlTemplateVelocityTest.scala | 18 +++++++++--------- .../templating/jinja/JinjaTemplateEngine.scala | 3 ++- .../workbench/utils/ErrorResult.scala | 6 ++++-- 6 files changed, 28 insertions(+), 25 deletions(-) rename {silk-workbench/silk-workbench-core/app => silk-core/src/main/scala}/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala (54%) diff --git a/build.sbt b/build.sbt index afcf59a8ba..e9b5914e27 100644 --- a/build.sbt +++ b/build.sbt @@ -148,7 +148,7 @@ lazy val workspace = (project in file("silk-workspace")) ////////////////////////////////////////////////////////////////////////////// lazy val pluginsTemplatingJinja = (project in file("silk-plugins/silk-plugins-templating-jinja")) - .dependsOn(rules % "compile->compile;test->test", workbenchCore) + .dependsOn(rules % "compile->compile;test->test") .settings(commonSettings *) .settings( name := "Silk Plugins Templating Jinja", @@ -156,7 +156,7 @@ lazy val pluginsTemplatingJinja = (project in file("silk-plugins/silk-plugins-te ) lazy val pluginsRdf = (project in file("silk-plugins/silk-plugins-rdf")) - .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile") + .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile", pluginsTemplatingJinja % "test->compile") .settings(commonSettings *) .settings( name := "Silk Plugins RDF", diff --git a/silk-workbench/silk-workbench-core/app/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala similarity index 54% rename from silk-workbench/silk-workbench-core/app/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala rename to silk-core/src/main/scala/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala index 645bd258a3..3a2e6c5db2 100644 --- a/silk-workbench/silk-workbench-core/app/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala @@ -1,29 +1,24 @@ package org.silkframework.runtime.templating.exceptions import org.silkframework.runtime.templating.TemplateVariableName -import org.silkframework.runtime.templating.exceptions.UnboundVariablesException.generateMessage -import org.silkframework.workbench.utils.JsonRequestException -import play.api.libs.json.{JsObject, Json} /** * Thrown if a value for an unbound variable is missing. */ class UnboundVariablesException(val missingVars: Seq[TemplateVariableName], cause: Option[Exception] = None) - extends TemplateEvaluationException(generateMessage(missingVars), cause) with JsonRequestException { + extends TemplateEvaluationException(UnboundVariablesException.generateMessage(missingVars), cause) { /** * A short description of the error type. */ override def errorTitle: String = "Unbound variables" + /** - * Json that will be included in addition to the HTTP Problem details JSON. - * Note that using reserved HTTP Problem details fields (type, title, detail) would overwrite the generated ones. - */ - override def additionalJson: JsObject = { - Json.obj( - "unboundVariables" -> missingVars.map(_.scopedName) - ) + * Include the unbound variables in the HTTP Problem details JSON. + */ + override def additionalData: Map[String, Seq[String]] = { + Map("unboundVariables" -> missingVars.map(_.scopedName)) } } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/validation/RequestException.scala b/silk-core/src/main/scala/org/silkframework/runtime/validation/RequestException.scala index 15c9ac3039..e432381a7b 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/validation/RequestException.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/validation/RequestException.scala @@ -21,4 +21,9 @@ abstract class RequestException(msg: String, cause: Option[Throwable]) extends R */ def httpErrorCode: Option[Int] + /** + * Additional key-value pairs that should be included in the error response. + */ + def additionalData: Map[String, Seq[String]] = Map.empty + } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala index 3c98e570e7..0ee6f2f80e 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala @@ -29,16 +29,16 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { compiled.variables.get.map(_.name).sorted mustBe Seq("row") } - private val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> - |INSERT DATA { - | "entity 1" . - | #if ($$row.exists("input1")) - | $$row.uri("input1") $$row.plainLiteral("input2")^^xsd:string - | #end - |}; - |""".stripMargin - it should "validate without problems for valid templates" in { + val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> + |INSERT DATA { + | "entity 1" . + | #if ($$row.exists("input1")) + | $$row.uri("input1") $$row.plainLiteral("input2")^^xsd:string + | #end + |}; + |""".stripMargin + validate(sparqlUpdateTemplate) VelocityTemplateEngine().compile(templateWithLogic).variables.get.map(_.name).sorted mustBe Seq("row") validate(templateWithLogic) diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala index 91e11ab0ab..22dc4ffaed 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala @@ -75,7 +75,8 @@ object JinjaTemplateEngine { class JinjaTemplate(val node: Node) extends CompiledTemplate { override val variables: Option[Seq[TemplateVariableName]] = { - Some(new JinjaVariableCollector().collect(node).unboundVars) + val result = new JinjaVariableCollector().collect(node) + Some(result.unboundVars) } override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig = EvaluationConfig()): Unit = { diff --git a/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala b/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala index cd82d2f811..8d1a680bf8 100644 --- a/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala +++ b/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala @@ -66,8 +66,10 @@ object ErrorResult { ex match { case requestEx: RequestException with JsonRequestException => requestEx.additionalJson - case _ => - Json.obj() + case requestEx: RequestException => + JsObject( + requestEx.additionalData.map(data => data._1 -> JsString(data._2.toString)).toSeq + ) } } From 4c761cc53aafa27fb572df5b9df056932bf3c75a Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 12:51:28 +0100 Subject: [PATCH 12/63] Add SparqlTemplateJinjaTest and fix found bug --- .../templating/SparqlTemplateJinjaTest.scala | 64 +++++++++++++++++++ .../jinja/JinjaVariableCollector.scala | 17 ++++- .../jinja/JinjaVariableCollectorTest.scala | 11 ++++ 3 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala new file mode 100644 index 0000000000..e7507637ba --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala @@ -0,0 +1,64 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.apache.jena.vocabulary.XSD +import org.silkframework.plugins.templating.jinja.JinjaTemplateEngine +import org.silkframework.runtime.validation.ValidationException +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers + +class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { + + behavior of "SPARQL templating with the Jinja Template Engine" + + it should "output the correct input paths of the template" in { + val templateString = + """ + |{{ row.uri("subject") }} + |{% if row.exists("somePath") %} + | Plain: {{ row.plainLiteral("somePath") }} + | Raw: {{ row.rawUnsafe("trustedValuePath") }} + |{% endif %} + |""".stripMargin + val compiled = JinjaTemplateEngine().compile(templateString) + compiled.variables.get.map(_.name).sorted mustBe Seq("row") + } + + it should "validate without problems for valid templates" in { + val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> + |INSERT DATA { + | "entity 1" . + | {% if row.exists("input1") %} + | {{ row.uri("input1") }} {{ row.plainLiteral("input2") }}^^xsd:string + | {% endif %} + |}; + |""".stripMargin + + JinjaTemplateEngine().compile(templateWithLogic).variables.get.map(_.name).sorted mustBe Seq("row") + validate(templateWithLogic) + } + + it should "raise a validation error when the template is invalid" in { + intercept[ValidationException] { + validate("""DELETE DATA { unknownPrefix:label "test" } ;""") + } + intercept[ValidationException] { + validate( + """PREFIX foaf: + | + |WITH + |DELETE { ?person ?property ?value } + |WHERE { ?person ?property ?value ; foaf:givenName 'Fred } ;""".stripMargin) // Missing closing ' for literal + } + val batchTemplate = + s"""PREFIX xsd: <${XSD.getURI}> + |INSERT DATA { "hello" } ;""".stripMargin + validate(batchTemplate.dropRight(1), batchSize = 1) // Dropped ';' at the end, not batch supported, but batch size is 1 + intercept[ValidationException] { + validate(batchTemplate.dropRight(1)) // Dropped ';' at the end, not batch supported + } + } + + def validate(template: String, batchSize: Int = 2): Unit = { + new SparqlTemplate(JinjaTemplateEngine().compile(template)).validate(batchSize) + } +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala index b96ba155f6..ff1c263898 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala @@ -93,12 +93,16 @@ class JinjaVariableCollector { private def collectFromExpression(expression: String): Scope = { try { val tree = builder.build(EXPRESSION_START_TOKEN + expression + EXPRESSION_END_TOKEN) - // Manually treat simple expressions of the form `project.variable` + // Manually treat simple expressions of the form `project.variable` or `variable.method(...)` expression match { case JinjaVariableCollector.scopedName(scope, name) => Scope( unboundVars = Seq(new TemplateVariableName(name, scope)) ) + case JinjaVariableCollector.methodCallOnVar(varName) => + Scope( + unboundVars = Seq(new TemplateVariableName(varName, "")) + ) case _ => Scope( unboundVars = tree.getIdentifierNodes.asScala.map(_.getName).filterNot(ignoreIdentifierNode).toSeq.map(new TemplateVariableName(_, "")) @@ -106,7 +110,13 @@ class JinjaVariableCollector { } } catch { case _: TreeBuilderException => - Scope.empty + // Fallback: try to extract the leading variable from method call expressions like `var.method(...)` + expression match { + case JinjaVariableCollector.methodCallOnVar(varName) => + Scope(unboundVars = Seq(new TemplateVariableName(varName, ""))) + case _ => + Scope.empty + } } } @@ -160,4 +170,7 @@ object JinjaVariableCollector { // Regex for scoped names of the form scope.var private val scopedName = s"($variableRegex)\\.($variableRegex)".r + // Regex for method calls on a variable of the form var.method(...) + private val methodCallOnVar = s"($variableRegex)\\.$variableRegex\\(.*\\)".r + } diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala index fffaee6002..faf0762bbe 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala @@ -92,6 +92,17 @@ class JinjaVariableCollectorTest extends AnyFlatSpec with Matchers { collect("This is {{project.name}} from {{global.city}}.") shouldBe Seq("project.name", "global.city") } + it should "collect variables used in method calls" in { + collect( + """ + | {{ row.uri("subject") }} + | {% if row.exists("somePath") %} + | Plain: {{ row.plainLiteral("somePath") }} + | Raw: {{ row.rawUnsafe("trustedValuePath") }} + | {% endif %} + |""".stripMargin) shouldBe Seq("row") + } + it should "don't fail on empty expressions" in { collect("{{ }}".stripMargin) shouldBe Seq.empty } From d17b95464a27415c4983695fffb3588119104038 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 13:14:33 +0100 Subject: [PATCH 13/63] Add JinjaMethodCollector with tests and integrate into JinjaTemplate Co-Authored-By: Claude Sonnet 4.6 --- .../jinja/JinjaMethodCollector.scala | 45 +++++++++++++ .../jinja/JinjaTemplateEngine.scala | 6 +- .../jinja/JinjaMethodCollectorTest.scala | 64 +++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollector.scala create mode 100644 silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollectorTest.scala diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollector.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollector.scala new file mode 100644 index 0000000000..6ed97e45f9 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollector.scala @@ -0,0 +1,45 @@ +package org.silkframework.plugins.templating.jinja + +import com.hubspot.jinjava.tree.{ExpressionNode, Node, TagNode} +import com.hubspot.jinjava.tree.parse.ExpressionToken +import org.silkframework.runtime.templating.TemplateMethodUsage + +import scala.jdk.CollectionConverters.IterableHasAsScala +import scala.util.matching.Regex + +/** + * Collects all method usages on a given variable in a Jinja template. + * Only methods with a single string constant parameter are returned. + */ +class JinjaMethodCollector { + + /** + * Collects all usages of methods called on the given variable name in the template node. + */ + def collect(node: Node, variableName: String): Seq[TemplateMethodUsage] = { + node match { + case tagNode: TagNode => + val fromHelpers = extractMethodUsages(tagNode.getHelpers, variableName) + val fromChildren = tagNode.getChildren.asScala.flatMap(collect(_, variableName)).toSeq + fromHelpers ++ fromChildren + case exprNode: ExpressionNode => + val expr = exprNode.getMaster.asInstanceOf[ExpressionToken].getExpr + extractMethodUsages(expr, variableName) + case _ => + node.getChildren.asScala.flatMap(collect(_, variableName)).toSeq + } + } + + private def extractMethodUsages(expression: String, varName: String): Seq[TemplateMethodUsage] = { + JinjaMethodCollector.methodCallPattern(varName).findAllMatchIn(expression).map { m => + TemplateMethodUsage(m.group(1), m.group(2)) + }.toSeq + } +} + +object JinjaMethodCollector { + + // Matches: varName.methodName("param") or varName.methodName('param') + private def methodCallPattern(varName: String): Regex = + s"""${Regex.quote(varName)}\\.([a-zA-Z_][a-zA-Z0-9_]*)\\(["']([^"']*)["']\\)""".r +} \ No newline at end of file diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala index 22dc4ffaed..15288387d3 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala @@ -5,7 +5,7 @@ import com.hubspot.jinjava.tree.Node import com.hubspot.jinjava.{Jinjava, JinjavaConfig} import org.silkframework.runtime.plugin.annotations.Plugin import org.silkframework.runtime.templating.exceptions.{TemplateEvaluationException, UnboundVariablesException} -import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableName, TemplateVariableValue} +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} import java.io.Writer import java.util.EmptyStackException @@ -79,6 +79,10 @@ class JinjaTemplate(val node: Node) extends CompiledTemplate { Some(result.unboundVars) } + override def methodUsages(variableName: String): Seq[TemplateMethodUsage] = { + new JinjaMethodCollector().collect(node, variableName) + } + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig = EvaluationConfig()): Unit = { // Check if values for all variables are provided // We do this explicitly because the Jinja-internal checks are not sufficient diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollectorTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollectorTest.scala new file mode 100644 index 0000000000..0a5c5d1ee2 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollectorTest.scala @@ -0,0 +1,64 @@ +package org.silkframework.plugins.templating.jinja + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import org.silkframework.runtime.templating.TemplateMethodUsage + +class JinjaMethodCollectorTest extends AnyFlatSpec with Matchers { + + behavior of "JinjaMethodCollector" + + it should "collect a method call in an expression node" in { + collect("""{{ row.uri("subject") }}""", "row") shouldBe Seq(TemplateMethodUsage("uri", "subject")) + } + + it should "collect a method call in an if tag helper" in { + collect("""{% if row.exists("somePath") %}yes{% endif %}""", "row") shouldBe Seq(TemplateMethodUsage("exists", "somePath")) + } + + it should "collect multiple method calls across nodes" in { + collect( + """ + |{{ row.uri("subject") }} + |{% if row.exists("somePath") %} + | {{ row.plainLiteral("somePath") }} + | {{ row.rawUnsafe("trustedValuePath") }} + |{% endif %} + |""".stripMargin, "row" + ) shouldBe Seq( + TemplateMethodUsage("uri", "subject"), + TemplateMethodUsage("exists", "somePath"), + TemplateMethodUsage("plainLiteral", "somePath"), + TemplateMethodUsage("rawUnsafe", "trustedValuePath") + ) + } + + it should "only collect methods on the requested variable" in { + collect( + """{{ row.uri("subject") }} {{ other.uri("subject") }}""", + "row" + ) shouldBe Seq(TemplateMethodUsage("uri", "subject")) + } + + it should "collect method calls using single-quoted parameters" in { + collect("""{{ row.uri('subject') }}""", "row") shouldBe Seq(TemplateMethodUsage("uri", "subject")) + } + + it should "return an empty sequence when no methods are called on the variable" in { + collect("""INSERT DATA { "hello" }""", "row") shouldBe Seq.empty + } + + it should "return an empty sequence when the variable is not present" in { + collect("""{{ other.uri("subject") }}""", "row") shouldBe Seq.empty + } + + it should "not collect method calls without a string constant parameter" in { + // row.method(var) — non-constant parameter, should not be collected + collect("""{{ row.uri(subject) }}""", "row") shouldBe Seq.empty + } + + private def collect(template: String, variableName: String): Seq[TemplateMethodUsage] = { + val node = JinjaTemplateEngine().compile(template).node + new JinjaMethodCollector().collect(node, variableName) + } +} \ No newline at end of file From 6e40d803337f3de71f1a951a65694ef98266cc65 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 13:19:13 +0100 Subject: [PATCH 14/63] Add More tests for template engine --- .../templating/SparqlVelocityTemplating.scala | 9 +- .../rdf/sparql/SparqlTemplatingTest.scala | 114 +++++++++--------- .../templating/SparqlTemplateJinjaTest.scala | 60 +++++++++ 3 files changed, 122 insertions(+), 61 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala index bb0e9a334a..470e052dcf 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala @@ -9,6 +9,7 @@ import org.apache.velocity.exception.MethodInvocationException import org.apache.velocity.runtime.RuntimeSingleton import org.apache.velocity.{Template, VelocityContext} import org.silkframework.rule.util.JenaSerializationUtil +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException import scala.util.Try @@ -54,7 +55,7 @@ object SparqlVelocityTemplating { } catch { case ex: MethodInvocationException => val adaptedMessage = prettifyExceptionMessage(Option(ex.getMessage).getOrElse("")) - throw TemplateExecutionException("Template could not be rendered. Error detail: " + adaptedMessage, ex) + throw new TemplateEvaluationException("Template could not be rendered. Error detail: " + adaptedMessage, Some(ex)) } } @@ -116,7 +117,7 @@ trait TemplateValueAccessApi { def uri(inputPath: String): String = { val value = objectValue(inputPath) if(Try(new URI(value)).isFailure) { - throw TemplateExecutionException(s"Value for input path '$inputPath' is not a valid URI: '$value'") + throw new TemplateEvaluationException(s"Value for input path '$inputPath' is not a valid URI: '$value'") } val uriNode = NodeFactory.createURI(value) JenaSerializationUtil.serializeSingleNode(uriNode) @@ -132,7 +133,7 @@ trait TemplateValueAccessApi { case Some(value) => value case None => - throw TemplateExecutionException(s"Input path '$inputPath' did not exist in $$$templateVarName.") + throw new TemplateEvaluationException(s"Input path '$inputPath' did not exist in $$$templateVarName.") } } @@ -149,5 +150,3 @@ trait TemplateValueAccessApi { objectValue(inputPath) } } - -case class TemplateExecutionException(message: String, cause: Throwable = null) extends RuntimeException(message, cause) \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala index b222af138a..4866c9a5aa 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala @@ -1,57 +1,59 @@ -package org.silkframework.plugins.dataset.rdf.sparql - -import org.silkframework.plugins.dataset.rdf.tasks.templating.{Row, SparqlVelocityTemplating, TaskProperties, TemplateExecutionException} +package org.silkframework.plugins.dataset.rdf.sparql + + +import org.silkframework.plugins.dataset.rdf.tasks.templating.{Row, SparqlVelocityTemplating, TaskProperties} import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers - -class SparqlTemplatingTest extends AnyFlatSpec with Matchers { - behavior of "SPARQL Templating" - - it should "render a simple Velocity template" in { - val stringTemplate = - """SELECT * WHERE { - | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") - |}""".stripMargin - val template = SparqlVelocityTemplating.createTemplate(stringTemplate) - for(i <- 1 to 10) { - val rendered = SparqlVelocityTemplating.renderTemplate( - template, Row(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i")), TaskProperties(Map.empty, Map.empty)) - rendered mustBe - s"""SELECT * WHERE { - | rdfs:label "some label $i" - |}""".stripMargin - } - } - - it should "render templates safely as long as safe methods are used, no injection attack possible" in { - val template = executeTemplate("""$row.plainLiteral("var")""", Map("var" -> "\"Delete everything!!!\"")) - template mustBe "\"\\\"Delete everything!!!\\\"\"" - } - - it should "fail if the value for uri() is not an URI" in { - intercept[TemplateExecutionException] { - executeTemplate("""$row.uri("uri")""", Map("uri" -> "http:// broken Uri >")) - } - } - - it should "output a nice error message when there is a syntax error" in { - - } - - it should "throw exception when a non-available method or variable is used" in { - intercept[TemplateExecutionException] { - executeTemplate("""Not existing $test""", Map.empty) - } - intercept[TemplateExecutionException] { - executeTemplate("""Not existing $row.notExisting("blah")""", Map("a" -> "A")) - } - intercept[TemplateExecutionException] { - executeTemplate("""Not existing $row.uri("notExists")""", Map("a" -> "A")) - } - } - - private def executeTemplate(templateString: String, bindings: Map[String, String]): String = { - val template = SparqlVelocityTemplating.createTemplate(templateString) - SparqlVelocityTemplating.renderTemplate(template, Row(bindings), TaskProperties(Map.empty, Map.empty)) - } -} +import org.scalatest.matchers.must.Matchers +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException + +class SparqlTemplatingTest extends AnyFlatSpec with Matchers { + behavior of "SPARQL Templating" + + it should "render a simple Velocity template" in { + val stringTemplate = + """SELECT * WHERE { + | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") + |}""".stripMargin + val template = SparqlVelocityTemplating.createTemplate(stringTemplate) + for(i <- 1 to 10) { + val rendered = SparqlVelocityTemplating.renderTemplate( + template, Row(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i")), TaskProperties(Map.empty, Map.empty)) + rendered mustBe + s"""SELECT * WHERE { + | rdfs:label "some label $i" + |}""".stripMargin + } + } + + it should "render templates safely as long as safe methods are used, no injection attack possible" in { + val template = executeTemplate("""$row.plainLiteral("var")""", Map("var" -> "\"Delete everything!!!\"")) + template mustBe "\"\\\"Delete everything!!!\\\"\"" + } + + it should "fail if the value for uri() is not an URI" in { + intercept[TemplateEvaluationException] { + executeTemplate("""$row.uri("uri")""", Map("uri" -> "http:// broken Uri >")) + } + } + + it should "output a nice error message when there is a syntax error" in { + + } + + it should "throw exception when a non-available method or variable is used" in { + intercept[TemplateEvaluationException] { + executeTemplate("""Not existing $test""", Map.empty) + } + intercept[TemplateEvaluationException] { + executeTemplate("""Not existing $row.notExisting("blah")""", Map("a" -> "A")) + } + intercept[TemplateEvaluationException] { + executeTemplate("""Not existing $row.uri("notExists")""", Map("a" -> "A")) + } + } + + private def executeTemplate(templateString: String, bindings: Map[String, String]): String = { + val template = SparqlVelocityTemplating.createTemplate(templateString) + SparqlVelocityTemplating.renderTemplate(template, Row(bindings), TaskProperties(Map.empty, Map.empty)) + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala index e7507637ba..b3cbc85dbc 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala @@ -5,6 +5,7 @@ import org.silkframework.plugins.templating.jinja.JinjaTemplateEngine import org.silkframework.runtime.validation.ValidationException import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { @@ -37,6 +38,10 @@ class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { validate(templateWithLogic) } + it should "always validate templates as correct if rawUnsafe() is used, because there is no way to generate meaningful examples to validate" in { + validate("""Completely broken SPARQL Update query with {{ row.rawUnsafe("something") }}""") + } + it should "raise a validation error when the template is invalid" in { intercept[ValidationException] { validate("""DELETE DATA { unknownPrefix:label "test" } ;""") @@ -58,6 +63,61 @@ class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { } } + it should "render uri() values as SPARQL URI syntax" in { + val result = generate( + """INSERT DATA { {{ row.uri("subject") }} "value" } ;""", + Map("subject" -> "urn:entity:1") + ) + result must include("") + } + + it should "render plainLiteral() values as escaped SPARQL literals" in { + val result = generate( + """INSERT DATA { {{ row.plainLiteral("label") }} } ;""", + Map("label" -> """hello "world"""") + ) + result must include(""""hello \"world\""""") + } + + it should "inject rawUnsafe() values verbatim without modification" in { + val rawValue = " " + val result = generate( + """INSERT DATA { {{ row.rawUnsafe("raw") }} } ;""", + Map("raw" -> rawValue) + ) + result must include(rawValue) + } + + it should "include or exclude blocks based on exists()" in { + val template = + """INSERT DATA { {% if row.exists("x") %} "found" .{% endif %} } ;""" + generate(template, Map("x" -> "urn:entity:1")) must include("found") + generate(template, Map.empty) must not include "found" + } + + it should "render inputProperties and outputProperties via TaskProperties" in { + val result = generate( + """INSERT DATA { {{ inputProperties.uri("x") }} {{ outputProperties.uri("y") }} } ;""", + assignments = Map.empty, + taskProps = TaskProperties(Map("x" -> "urn:input:1"), Map("y" -> "urn:output:1")) + ) + result must include("") + result must include("") + } + + it should "throw a TemplateExecutionException when uri() receives a non-URI value" in { + intercept[TemplateEvaluationException] { + generate( + """INSERT DATA { {{ row.uri("subject") }} "value" } ;""", + Map("subject" -> "not a uri") + ) + } + } + + def generate(template: String, assignments: Map[String, String], taskProps: TaskProperties = TaskProperties(Map.empty, Map.empty)): String = { + new SparqlTemplate(JinjaTemplateEngine().compile(template)).generate(assignments, taskProps) + } + def validate(template: String, batchSize: Int = 2): Unit = { new SparqlTemplate(JinjaTemplateEngine().compile(template)).validate(batchSize) } From 189db8b07f8351fb8a0c54d8a776ccf05375fc69 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 13:27:59 +0100 Subject: [PATCH 15/63] Add Jinja tests to SparqlUpdateTaskIntegrationTest --- .../dataset/rdf/sparqlUpdateProject.zip | Bin 8850 -> 11241 bytes .../rdf/SparqlUpdateTaskIntegrationTest.scala | 6 +++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/silk-plugins/silk-plugins-rdf/src/test/resources/org/silkframework/plugins/dataset/rdf/sparqlUpdateProject.zip b/silk-plugins/silk-plugins-rdf/src/test/resources/org/silkframework/plugins/dataset/rdf/sparqlUpdateProject.zip index b74b7570d34a82ac44954c8ff60fc65072ad5513..af082b443de35e08fc06cdc1b675ad037c4177c4 100644 GIT binary patch delta 4232 zcmZ`+c_5VQ_n#Tt*crnx24f#2MD{FMvt$>ND@(SqWOu2_a#NxkFUeN9LbfcEExS-s z$`YZhSyE&tOMPDo&Aq?h{59`$KIe1J^PKaXbDjjPPiHud^~uPQ5C{YgaqtMTH4(;; zKL)?fKp_xrKnjkEKi0_hJ3wX>8%zcY2cdvH0~#2`&=Xamj8GCxmV4<2xWLuu+3*7 za%ESwxIz?lbTfl|)liYAx}6}T$a<$_{a7b%E57!Q zRY;XiopJg%39@I6SJA^cZwb&h9d+=l#$RH3E(vvkm@8@Cc>+R$0>+k05 zw|mYRW?iS7C6Sk=#byzRFeafzw@yKLi>uo8mndpR=bFaWq5{uo+(1L9TqE>4+A~$= zv%-n>we(KU+(CiT*Q>No7hZHH6!g!Wy86D6D~95Ae7K$fF7m$kg<(NVH#dAj$mI6) zTGIjpLGFD)$(&DU7%YBTR@S3>1j>a=q-0T_`F^JTlQ8dT=99&4auS_i{8H;Zm0Amj zDt5le*5iiE9uq$|arv}53I&JtmN1sax|Y8ay@@#qG&n4Ow(-JP^`)G6=o66CXmu9Ly!K9dAyAr7E z4$)$yKX3BN#*3l%M}Fc1?VOz0XB4pM#?oAqRn~OKI8|I~# zcF-umBVc?zZH_V3x*sL)P|G!iUE^uU>DoTsgdf+G%6hg?@RE9wU(|mtGi_*b zRYOyj>6^e=zW%Bt>W*?a8&+02T)@~S-zT~%6?fG&BcX4myY!EbWu_GI2Dx73XDg#~ zT;8|IOE_-J&-QLfM1ClbaTucvka<-WJh_7}S~?Sz@$DG-Q}K6>s!bx@n==G~xwn4SO(= z%$XS0!P;+!YUDO5S5_2#3t=?Jnz()}C8!4lDY!Ihr1M7E5r<2}R*8BA^Kv$)p9T)j z<$DepdCjFI1f&4PN>v)7Zki5gF57p}x{_w%WifXgZ#6PSkyH3mPq7M5cok0swP~ft zcjtX9wy?_V&UX?iu$9&Bugv;hXJm>98DaOfy52(nSJm~{422fAqfCm;Ez_PNZ3NT= zXD{V>K@Yd8x2o&Qnm073iSkUe?8YI2c~-W~vHCN`Slw>5AI=er-rRKjy(z;~3KB?O z24NQI2E*R1>qdDDZ2VzLXuoA)inqkf902 zftMTt5dj;7A8;Jb33zbR6LYAwNlSa?H7cEU5W=@x-ocRZ0HRdGB*p)V8KaR7DCOQI z+eHv%Xe~)eyra_#^+AXn2sye7A$SM)`vmy!bqEc=z}@1Nov7{sX27$rTS)6pzqqnU z-6K_sj2h}CH3rJHneAv`Wa*!x7(7~_B*lVczwh1LKK4%Y|ZXdaAd9u5|vETka-*Iz08FsR1 zdiQ(2FN}TjKeo8zwi9GsyD;%N+n3W`xqQRd4238pPyA3*jq}}bpnY@a(q`D0WK0(H zZk=V124uYE;?ty-OiKdgy-eE}v?twTDmN+1`)5V}sR-&-Ott!*_$zdc1C#=sW}fxbc!)(#ue~k5d@5wE|8{6)bm(*(2+^1{aJy4 z#Sod96=AHuj}-Pp^>ij6i5Ex`OG41A~$t zYbLE!^%G$N_X>3#E4KpuXockXip@Ja{&bd~?MqIdvhH+^!7deNo~b-eh^q9g31_<8 z@@Syj>WRkdU<)aABw*@T_a^h3xqmJ-%y7kA8G6KT~#hK)*G} zKzDSZZgy(ELvZNgVvnnS&XuQ8qYl~0>S^KezMgC9qw8z+70Im3BdyY6@3dFpEJ|eE zbA&vcNnyue%&~;>@hFpr7YSi>$8X*rNF8Z^s)dxX|D-6c71~1SKt7l9l8io_PrlR1 zU&m1T=I8F^`wb^KW&@3L&4h)#uhO$<<```L^X3TI4|V3QF}pOCw)nnhxb95=CHL0# z!E5)%Ub>9C72hcDwhyi^G}PbLk%UBwrlj(R<`2`QSZVmpEhD<;>&SS78^#902So!{ z|Hkc5kS5klT&!pe7^?v=R?5IZto}O){k0c4_N!laSRbtGiO{`)$jxg{9Lr*73JISi@tqwact+lFO=0Vz% zoV9307cu(pkpbVL8r`gj3{>XQJ6R4>_AQU>X{%5Gszzv$^k?|xpe}Z6W z)b$L?PiqYpb)dZ?ljgIANj#>p{%{8NqB!I}s!$p+V|`Nm;R6N&5d$mA;ZGUx!5qW=&Q0==IRt|L zqRD`Z7?I-#Z<>EwAsN(3G$9nbniPAQ;H-l_^!*WrjdB-SOSy{xw#n#@{F|0!Vj`hc zfd5j0lafktqhV{*po|SD;evxro%PV*WiB)?x%2avgw4Iui-_F6S>W^Z%cZB(;)6+nsA) w`~S+xM$r4yJo^ZMj1PIZGaf!PjF={*dgqY=OBuff7j0c0M(*H)&Kwi delta 2252 zcmai#2~ZPP7{`-MNR}lK5&|&`gb*?i1Oz!0HHbl~$RUTwp|qk1a!AC+7ODsVtyL=u zJ{&|q5Cl}}AdMVKDIQ2wYSqDxRSxS3s89-4D^p9~rVzp~;LGe}_xrxz{@?e#_r2{4 zJ^YDrfDeVrA)zQr;$`yvNg9I>SQr=hSztMmI%_%_Xk|h@58a_;5@~wKMg_?bU8Oyl zNrss7?}8KxL>_(|Q_2uyvI!k6i?10$VEEHrB&Q&uKe$N?PB(~nWl5DBRp8-n zUaq-aWUGO?T00aUZH&&lmAUUkUd8a@qOUfsnHR>hY0oZ5&iqr?W&f)~&%&>DUeOUb zdNh`|$P7&qn3!{YP`e`Q@{oDGP5k;O&wST(QA!k{1AXdpg?j2(r6(MNJ zxIYl#@~~;oeVs+eAN8ld?{BL}zPmH}EMhIY;Pp7e?ekl!gefCGC84LiJ6GPY^h>Su z)C<5Fy)F%NeyP~C-u<+H2z; z+e=@^AT`zJgQW)>j)|AW79GCV`>@9S*~K6&*6kP>HNPXOT`ntgsMk&_+V=88-sKl( zOT#Woi+q*c%|q+!Lf+eC-n3V(jH+H9uwJ`HIO(4PUoO(nU9u%K(f+sPaku*x)c1T=55%{kiqC{*KCUgN9@%mipr`k2+MA-- zUX;1?hL%@WiEZh9L@%F7D?8@XCa0Ig<;A}=y#D2doeRIdF0U$ZYyKk>6Jc3}yltbo za+;)t=h2()68}YGM5FEegtVr+=J8PxcCo}$SqdwTeXSJTH-@fkInIt{iLSNs}?@uueii1pTm;Jrf@kOgYw!PhI zqrCk&aBSAxJt@@I%8 zk>(QfR@$>-2_!x6K|vryq%$J_H~ef426V)_%5Js^(ZL*6TnrA;E_h9@63p4vjslHz zlsIZ&rm8VHXw0bkb5tEDNt`5@aDoG#9{{bf@S2VQ#S~nrZy16QN$*Y@Dz=I0yGb78(0F3K?YJiBLEL~U{A)tISYu4eK)`*3ml0+#{68{ z03R~qPT{Bz?)XE04cbC=3%D9i>Ng4$Co}3PDrkUaHVIOnUJx39kaQB{tThH1gFPr) zpHQ2W4`zuYgESb!hfQcO)sI`@h!Lvt2U~H}scSZrwOWw4S*NWEcEEI1v(yyR?5xSq z2nHb$sI+8G)F}xnA7ua*%Lz1FY1HpFw0r4+8n)dO0YZPgPsGH7dNeX2AOIW1>*lf-=3h>dQ5q<;s L>x6eD#>CTqIF35+ diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala index 609e07ae1a..ad9863208e 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala @@ -23,7 +23,11 @@ class SparqlUpdateTaskIntegrationTest extends AnyFlatSpec with Matchers with Sin // Uses Velocity templating mode ("Velocity", "workflowVelocity", "outputVelocity.csv", identity), // Uses Velocity templating mode and accessed input and output task properties - ("Velocity with task properties", "workflowVelocityTaskProperties", "outputVelocity.csv", taskPropertyConcatenate) // + ("Velocity with task properties", "workflowVelocityTaskProperties", "outputVelocity.csv", taskPropertyConcatenate), + // Uses Jinja templating mode + ("Jinja", "workflowJinja", "outputJinja.csv", identity), + // Uses Jinja templating mode and accessed input and output task properties + ("Jinja with task properties", "workflowJinjaTaskProperties", "outputJinja.csv", taskPropertyConcatenate) )) { it should s"generate the correct result in '$templatingMode' templating mode" in { executeWorkflow(workflowId) From f241ac244ed780aa57200a68ba299bdacbfb5143 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 13:36:06 +0100 Subject: [PATCH 16/63] ErrorResult bugfix: Match was not exhausted --- .../app/org/silkframework/workbench/utils/ErrorResult.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala b/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala index 8d1a680bf8..e333839cf7 100644 --- a/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala +++ b/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala @@ -70,6 +70,8 @@ object ErrorResult { JsObject( requestEx.additionalData.map(data => data._1 -> JsString(data._2.toString)).toSeq ) + case _ => + Json.obj() } } From 8bdf371ade1fc279015a640b08747163f896b9d6 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 15:19:38 +0100 Subject: [PATCH 17/63] Move Velocity Template engine to its own module. --- build.sbt | 17 +- .../plugins/dataset/rdf/RdfPlugins.scala | 5 +- .../rdf/tasks/templating/SparqlTemplate.scala | 58 ++++++- .../SparqlUpdateTemplatingEngineSimple.scala | 6 +- .../templating/SparqlVelocityTemplating.scala | 152 ------------------ .../templating/TemplateValueAccessApi.scala | 59 +++++++ .../rdf/LocalSparqlUpdateExecutorTest.scala | 3 +- .../rdf/sparql/SparqlTemplatingTest.scala | 59 ------- .../SparqlTemplateVelocityTest.scala | 45 +++++- ....silkframework.runtime.plugin.PluginModule | 1 + .../velocity}/VelocityTemplateEngine.scala | 32 +++- .../velocity/VelocityTemplatingPlugins.scala | 7 + 12 files changed, 207 insertions(+), 237 deletions(-) delete mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala create mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/TemplateValueAccessApi.scala delete mode 100644 silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala create mode 100644 silk-plugins/silk-plugins-templating-velocity/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule rename silk-plugins/{silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating => silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity}/VelocityTemplateEngine.scala (77%) create mode 100644 silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplatingPlugins.scala diff --git a/build.sbt b/build.sbt index e9b5914e27..acbe266ea7 100644 --- a/build.sbt +++ b/build.sbt @@ -155,13 +155,20 @@ lazy val pluginsTemplatingJinja = (project in file("silk-plugins/silk-plugins-te libraryDependencies += "com.hubspot.jinjava" % "jinjava" % "2.8.3" ) +lazy val pluginsTemplatingVelocity = (project in file("silk-plugins/silk-plugins-templating-velocity")) + .dependsOn(rules % "compile->compile;test->test") + .settings(commonSettings *) + .settings( + name := "Silk Plugins Templating Velocity", + libraryDependencies += "org.apache.velocity" % "velocity-engine-core" % "2.4.1" + ) + lazy val pluginsRdf = (project in file("silk-plugins/silk-plugins-rdf")) - .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile", pluginsTemplatingJinja % "test->compile") + .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile", pluginsTemplatingJinja % "test->compile", pluginsTemplatingVelocity % "test->compile") .settings(commonSettings *) .settings( name := "Silk Plugins RDF", - libraryDependencies += "org.apache.jena" % "jena-fuseki-main" % "5.6.0" % "test", - libraryDependencies += "org.apache.velocity" % "velocity-engine-core" % "2.4.1" + libraryDependencies += "org.apache.jena" % "jena-fuseki-main" % "5.6.0" % "test" ) lazy val pluginsCsv = (project in file("silk-plugins/silk-plugins-csv")) @@ -230,8 +237,8 @@ lazy val persistentCaching = (project in file("silk-plugins/silk-persistent-cach // Aggregate all plugins lazy val plugins = (project in file("silk-plugins")) - .dependsOn(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching, pluginsTemplatingJinja) - .aggregate(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching, pluginsTemplatingJinja) + .dependsOn(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching, pluginsTemplatingJinja, pluginsTemplatingVelocity) + .aggregate(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching, pluginsTemplatingJinja, pluginsTemplatingVelocity) .settings(commonSettings *) .settings( name := "Silk Plugins" diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala index c94e3682e4..b2da803653 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala @@ -3,7 +3,7 @@ package org.silkframework.plugins.dataset.rdf import org.silkframework.plugins.dataset.rdf.datasets.{AlignmentDataset, InMemoryDataset, RdfFileDataset, SparqlDataset} import org.silkframework.plugins.dataset.rdf.executors.{LocalSparqlCopyExecutor, LocalSparqlSelectExecutor, LocalSparqlUpdateExecutor} import org.silkframework.plugins.dataset.rdf.tasks.{SparqlCopyCustomTask, SparqlSelectCustomTask, SparqlUpdateCustomTask} -import org.silkframework.plugins.dataset.rdf.tasks.templating.{SparqlSimpleTemplateEngine, VelocityTemplateEngine} +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSimpleTemplateEngine import org.silkframework.plugins.dataset.rdf.vocab.{InMemoryVocabularyManager, RdfFilesVocabularyManager, RdfProjectFilesVocabularyManager, RdfVocabularyManager} import org.silkframework.runtime.plugin.{AnyPlugin, PluginModule} @@ -22,8 +22,7 @@ class RdfPlugins extends PluginModule { classOf[SparqlSelectCustomTask], classOf[SparqlCopyCustomTask], classOf[SparqlUpdateCustomTask], - classOf[SparqlSimpleTemplateEngine], - classOf[VelocityTemplateEngine] + classOf[SparqlSimpleTemplateEngine] ) ++ executors val executors = Seq( diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index ee8ac4aa99..85987dcac6 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -4,6 +4,7 @@ import org.apache.jena.update.UpdateFactory import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate.{InputProperties, OutputProperties, Row} import org.silkframework.runtime.templating.{CompiledTemplate, TemplateMethodUsage, TemplateVariableName} import org.silkframework.runtime.validation.ValidationException @@ -21,9 +22,9 @@ class SparqlTemplate(template: CompiledTemplate) { // Flat entity values (used by simple template engine) placeholderAssignments.foreach { case (k, v) => values(k) = v } // SPARQL context objects (used by Velocity engine) - values(SparqlVelocityTemplating.ROW_VAR_NAME) = Row(placeholderAssignments) - values(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) - values(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) + values(SparqlTemplate.ROW_VAR_NAME) = Row(placeholderAssignments) + values(SparqlTemplate.INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) + values(SparqlTemplate.OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) val writer = new StringWriter() template.evaluate(values.toMap, writer) writer.toString @@ -86,13 +87,13 @@ class SparqlTemplate(template: CompiledTemplate) { /** Returns SPARQL-specific variables, extracting paths from method usages. */ private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = { - val usages = SparqlVelocityTemplating.templatingVariables.flatMap(v => template.methodUsages(v)) + val usages = SparqlTemplate.templatingVariables.flatMap(v => template.methodUsages(v)) if (usages.nonEmpty) { - val rowVars = sparqlMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME) + val rowVars = sparqlMethodUsages(SparqlTemplate.ROW_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "")) - val inputPropVars = sparqlMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME) + val inputPropVars = sparqlMethodUsages(SparqlTemplate.INPUT_PROPERTIES_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) - val outputPropVars = sparqlMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME) + val outputPropVars = sparqlMethodUsages(SparqlTemplate.OUTPUT_PROPERTIES_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) } else { @@ -107,7 +108,7 @@ class SparqlTemplate(template: CompiledTemplate) { /** Checks if any SPARQL templating variable uses the rawUnsafe method. */ private lazy val usesRawUnsafe: Boolean = { - SparqlVelocityTemplating.templatingVariables.exists(varName => + SparqlTemplate.templatingVariables.exists(varName => sparqlMethodUsages(varName).exists(_.methodName == "rawUnsafe")) } @@ -132,5 +133,46 @@ class SparqlTemplate(template: CompiledTemplate) { } } +object SparqlTemplate { + + final val ROW_VAR_NAME = "row" + final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" + final val OUTPUT_PROPERTIES_VAR_NAME = "outputProperties" + + final val templatingVariables = Seq(ROW_VAR_NAME, INPUT_PROPERTIES_VAR_NAME, OUTPUT_PROPERTIES_VAR_NAME) + + /** Row API used in SPARQL templates. Represents a single row where input paths are either exactly one value or empty. + * + * The Row object will be available in Velocity templates as 'row' variable. + * + * Examples: + * + *
+   *   $row.uri("urn:prop:uriProp") ## Renders the value of the input path as URI, e.g. 
+   *   $row.plainLiteral("urn:prop:stringProp") ## Renders the value of the input paths as plain string, e.g. "Quotes \" are escaped"
+   *   $row.rawUnsafe("urn:prop:trustedValuesOnly") ## Puts the value as it is into the rendered template. This is UNSAFE and prone to injection attacks.
+   *   #if ( $row.exists("urn:prop:valueMightNotExist") ) ## Checks if a value exists for the input path, i.e. values can always be optional.
+   *     $row.plainLiteral("urn:prop:valueMightNotExist") ## If no value exists for the input path then this would throw an exception
+   *   #end
+   * 
+ * + * @param inputValues The map of existing input values, i.e. values that were defined by input paths, but where no value was available are not set. + */ + case class Row(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = ROW_VAR_NAME + } + + /** Similar to Row, but for the input task properties. */ + case class InputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = INPUT_PROPERTIES_VAR_NAME + } + + /** Similar to Row, but for the output task properties. */ + case class OutputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = OUTPUT_PROPERTIES_VAR_NAME + } + +} + /** Makes properties of the input and output task of a SPARQL Update operator execution available. */ case class TaskProperties(inputTask: Map[String, String], outputTask: Map[String, String]) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala index 510b864b10..d2ee79116c 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala @@ -12,12 +12,12 @@ import scala.collection.mutable.ArrayBuffer import scala.util.matching.Regex /** - * A simple SPARQL Update templating engine that supports plain literal and URI placeholders. + * A simple templating engine that supports plain literal and URI placeholders. */ @Plugin( id = SparqlSimpleTemplateEngine.id, - label = "Simple SPARQL", - description = "A simple SPARQL Update templating engine that supports plain literal and URI placeholders." + label = "Simple (deprecated)", + description = "A simple templating engine that supports plain literal and URI placeholders." ) case class SparqlSimpleTemplateEngine() extends TemplateEngine { diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala deleted file mode 100644 index 470e052dcf..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala +++ /dev/null @@ -1,152 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating - -import java.io.{StringReader, StringWriter} -import java.net.URI - -import org.apache.jena.graph.NodeFactory -import org.apache.velocity.context.Context -import org.apache.velocity.exception.MethodInvocationException -import org.apache.velocity.runtime.RuntimeSingleton -import org.apache.velocity.{Template, VelocityContext} -import org.silkframework.rule.util.JenaSerializationUtil -import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException - -import scala.util.Try - -/** - * Templating engine für SPARQL queries. - * Based on the Apache Velocity engine. - * - * @see See [[https://velocity.apache.org/]] for more information. - */ -object SparqlVelocityTemplating { - final val ROW_VAR_NAME = "row" - final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" - final val OUTPUT_PROPERTIES_VAR_NAME = "outputProperties" - - final val templatingVariables = Seq(ROW_VAR_NAME, INPUT_PROPERTIES_VAR_NAME, OUTPUT_PROPERTIES_VAR_NAME) - - /** Creates a Velocity template based on the given template string. */ - def createTemplate(sparqlTemplate: String): Template = { - val service = RuntimeSingleton.getRuntimeServices - service.addProperty("runtime.strict_mode.enable", true) // This should fail if it cannot replace variables with input values. - val reader = new StringReader(sparqlTemplate) - val template = new Template() - template.setRuntimeServices(service) - template.setData(service.parse(reader, template)) - template.initDocument() - template - } - - /** Renders the template with the given context */ - def renderTemplate(template: Template, context: Context): String = { - val writer = new StringWriter() - template.merge(context, writer) - writer.toString - } - - def renderTemplate(template: Template, row: Row, taskProperties: TaskProperties): String = { - try { - val context = new VelocityContext() - context.put(ROW_VAR_NAME, row) - context.put(INPUT_PROPERTIES_VAR_NAME, InputProperties(taskProperties.inputTask)) - context.put(OUTPUT_PROPERTIES_VAR_NAME, OutputProperties(taskProperties.outputTask)) - renderTemplate(template, context) - } catch { - case ex: MethodInvocationException => - val adaptedMessage = prettifyExceptionMessage(Option(ex.getMessage).getOrElse("")) - throw new TemplateEvaluationException("Template could not be rendered. Error detail: " + adaptedMessage, Some(ex)) - } - } - - private def prettifyExceptionMessage(errorMessage: String): String = { - var replacement = errorMessage. - replace("java.lang.String", "String"). - replace("", ""). - replace("threw exception org.silkframework.plugins.dataset.rdf.tasks.templating.TemplateExecutionException", "has failed with error message") - for((className, varName) <- Seq(("Row", ROW_VAR_NAME), ("InputProperties", INPUT_PROPERTIES_VAR_NAME), ("OutputProperties", OUTPUT_PROPERTIES_VAR_NAME))) { - replacement = replacement.replace(s"Object 'org.silkframework.plugins.dataset.rdf.sparql.$className'", varName). - replace(s"in class org.silkframework.plugins.dataset.rdf.tasks.templating.$className" , s"of $$$varName object") - } - replacement - } -} - -/** Row API used in SPARQL templates. Represents a single row where input paths are either exactly one value or empty. - * - * The Row object will be available in Velocity templates as 'row' variable. - * - * Examples: - * - *
-  *   $row.uri("urn:prop:uriProp") ## Renders the value of the input path as URI, e.g. 
-  *   $row.plainLiteral("urn:prop:stringProp") ## Renders the value of the input paths as plain string, e.g. "Quotes \" are escaped"
-  *   $row.rawUnsafe("urn:prop:trustedValuesOnly") ## Puts the value as it is into the rendered template. This is UNSAFE and prone to injection attacks.
-  *   #if ( $row.exists("urn:prop:valueMightNotExist") ) ## Checks if a value exists for the input path, i.e. values can always be optional.
-  *     $row.plainLiteral("urn:prop:valueMightNotExist") ## If no value exists for the input path then this would throw an exception
-  *   #end
-  * 
- * - * @param inputValues The map of existing input values, i.e. values that were defined by input paths, but where no value was available are not set. - */ -case class Row(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = SparqlVelocityTemplating.ROW_VAR_NAME -} - -/** Similar to Row, but for the input task properties. */ -case class InputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME -} - -/** Similar to Row, but for the output task properties. */ -case class OutputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME -} - -/** API used in templates to access all kinds of input values. Represents a key value object where input values are either exactly one value or empty/not defined. - * - * See [[Row]] for examples. - * - */ -trait TemplateValueAccessApi { - def inputValues: Map[String, String] - - def templateVarName: String - - /** Returns the value for a specific input path as URI, i.e. <...> */ - def uri(inputPath: String): String = { - val value = objectValue(inputPath) - if(Try(new URI(value)).isFailure) { - throw new TemplateEvaluationException(s"Value for input path '$inputPath' is not a valid URI: '$value'") - } - val uriNode = NodeFactory.createURI(value) - JenaSerializationUtil.serializeSingleNode(uriNode) - } - - /** Checks if a value for the provided input path exists */ - def exists(inputPath: String): Boolean = { - inputValues.contains(inputPath) - } - - private def objectValue(inputPath: String): String = { - inputValues.get(inputPath) match { - case Some(value) => - value - case None => - throw new TemplateEvaluationException(s"Input path '$inputPath' did not exist in $$$templateVarName.") - } - } - - /** Returns the value for a specific input path as SPARQL plain literal, i.e. "..." */ - def plainLiteral(inputPath: String): String = { - val value = objectValue(inputPath) - val uriNode = NodeFactory.createLiteral(value) - JenaSerializationUtil.serializeSingleNode(uriNode) - } - - /** Puts the value of the input path as raw string into the rendered template. - * This can be UNSAFE and should never be used when the input data comes from untrusted sources. */ - def rawUnsafe(inputPath: String): String = { - objectValue(inputPath) - } -} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/TemplateValueAccessApi.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/TemplateValueAccessApi.scala new file mode 100644 index 0000000000..7a4886f2ba --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/TemplateValueAccessApi.scala @@ -0,0 +1,59 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import java.net.URI + +import org.apache.jena.graph.NodeFactory +import org.silkframework.rule.util.JenaSerializationUtil +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException + +import scala.util.Try + + +/** + * API used in templates to access all kinds of input values. Represents a key value object where input values are either exactly one value or empty/not defined. + * + * See [[Row]] for examples. + * + */ +trait TemplateValueAccessApi { + def inputValues: Map[String, String] + + def templateVarName: String + + /** Returns the value for a specific input path as URI, i.e. <...> */ + def uri(inputPath: String): String = { + val value = objectValue(inputPath) + if(Try(new URI(value)).isFailure) { + throw new TemplateEvaluationException(s"Value for input path '$inputPath' is not a valid URI: '$value'") + } + val uriNode = NodeFactory.createURI(value) + JenaSerializationUtil.serializeSingleNode(uriNode) + } + + /** Checks if a value for the provided input path exists */ + def exists(inputPath: String): Boolean = { + inputValues.contains(inputPath) + } + + private def objectValue(inputPath: String): String = { + inputValues.get(inputPath) match { + case Some(value) => + value + case None => + throw new TemplateEvaluationException(s"Input path '$inputPath' did not exist in $$$templateVarName.") + } + } + + /** Returns the value for a specific input path as SPARQL plain literal, i.e. "..." */ + def plainLiteral(inputPath: String): String = { + val value = objectValue(inputPath) + val uriNode = NodeFactory.createLiteral(value) + JenaSerializationUtil.serializeSingleNode(uriNode) + } + + /** Puts the value of the input path as raw string into the rendered template. + * This can be UNSAFE and should never be used when the input data comes from untrusted sources. */ + def rawUnsafe(inputPath: String): String = { + objectValue(inputPath) + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala index c3eb4c902f..81c192b838 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala @@ -8,7 +8,7 @@ import org.silkframework.execution.{ExecutionReport, ExecutorOutput} import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution} import org.silkframework.plugins.dataset.rdf.executors.LocalSparqlUpdateExecutor import org.silkframework.plugins.dataset.rdf.tasks.SparqlUpdateCustomTask -import org.silkframework.plugins.dataset.rdf.tasks.templating.{SparqlSimpleTemplateEngine, VelocityTemplateEngine} +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSimpleTemplateEngine import org.silkframework.runtime.activity.{ActivityContext, UserContext} import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, TestPluginContext} import org.silkframework.runtime.validation.ValidationException @@ -17,6 +17,7 @@ import org.silkframework.workspace.TestWorkspaceProviderTestTrait import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers import org.silkframework.execution.typed.SparqlUpdateEntitySchema +import org.silkframework.plugins.templating.velocity.VelocityTemplateEngine class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestWorkspaceProviderTestTrait { behavior of "Local SPARQL Update Executor" diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala deleted file mode 100644 index 4866c9a5aa..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala +++ /dev/null @@ -1,59 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.sparql - - -import org.silkframework.plugins.dataset.rdf.tasks.templating.{Row, SparqlVelocityTemplating, TaskProperties} -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers -import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException - -class SparqlTemplatingTest extends AnyFlatSpec with Matchers { - behavior of "SPARQL Templating" - - it should "render a simple Velocity template" in { - val stringTemplate = - """SELECT * WHERE { - | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") - |}""".stripMargin - val template = SparqlVelocityTemplating.createTemplate(stringTemplate) - for(i <- 1 to 10) { - val rendered = SparqlVelocityTemplating.renderTemplate( - template, Row(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i")), TaskProperties(Map.empty, Map.empty)) - rendered mustBe - s"""SELECT * WHERE { - | rdfs:label "some label $i" - |}""".stripMargin - } - } - - it should "render templates safely as long as safe methods are used, no injection attack possible" in { - val template = executeTemplate("""$row.plainLiteral("var")""", Map("var" -> "\"Delete everything!!!\"")) - template mustBe "\"\\\"Delete everything!!!\\\"\"" - } - - it should "fail if the value for uri() is not an URI" in { - intercept[TemplateEvaluationException] { - executeTemplate("""$row.uri("uri")""", Map("uri" -> "http:// broken Uri >")) - } - } - - it should "output a nice error message when there is a syntax error" in { - - } - - it should "throw exception when a non-available method or variable is used" in { - intercept[TemplateEvaluationException] { - executeTemplate("""Not existing $test""", Map.empty) - } - intercept[TemplateEvaluationException] { - executeTemplate("""Not existing $row.notExisting("blah")""", Map("a" -> "A")) - } - intercept[TemplateEvaluationException] { - executeTemplate("""Not existing $row.uri("notExists")""", Map("a" -> "A")) - } - } - - private def executeTemplate(templateString: String, bindings: Map[String, String]): String = { - val template = SparqlVelocityTemplating.createTemplate(templateString) - SparqlVelocityTemplating.renderTemplate(template, Row(bindings), TaskProperties(Map.empty, Map.empty)) - } -} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala index 0ee6f2f80e..874844b37b 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala @@ -1,10 +1,11 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.apache.jena.vocabulary.XSD - +import org.silkframework.plugins.templating.velocity.VelocityTemplateEngine import org.silkframework.runtime.validation.ValidationException import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { @@ -73,6 +74,48 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { } } + it should "render a simple Velocity template" in { + val stringTemplate = + """SELECT * WHERE { + | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") + |}""".stripMargin + val template = new SparqlTemplate(VelocityTemplateEngine().compile(stringTemplate)) + for(i <- 1 to 10) { + val rendered = template.generate(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i"), TaskProperties(Map.empty, Map.empty)) + rendered mustBe + s"""SELECT * WHERE { + | rdfs:label "some label $i" + |}""".stripMargin + } + } + + it should "render templates safely as long as safe methods are used, no injection attack possible" in { + val template = generate("""$row.plainLiteral("var")""", Map("var" -> "\"Delete everything!!!\"")) + template mustBe "\"\\\"Delete everything!!!\\\"\"" + } + + it should "fail if the value for uri() is not an URI" in { + intercept[TemplateEvaluationException] { + generate("""$row.uri("uri")""", Map("uri" -> "http:// broken Uri >")) + } + } + + it should "throw exception when a non-available method or variable is used" in { + intercept[TemplateEvaluationException] { + generate("""Not existing $test""", Map.empty) + } + intercept[TemplateEvaluationException] { + generate("""Not existing $row.notExisting("blah")""", Map("a" -> "A")) + } + intercept[TemplateEvaluationException] { + generate("""Not existing $row.uri("notExists")""", Map("a" -> "A")) + } + } + + private def generate(templateString: String, bindings: Map[String, String]): String = { + new SparqlTemplate(VelocityTemplateEngine().compile(templateString)).generate(bindings, TaskProperties(Map.empty, Map.empty)) + } + def validate(template: String, batchSize: Int = 2): Unit = { new SparqlTemplate(VelocityTemplateEngine().compile(template)).validate(batchSize) } diff --git a/silk-plugins/silk-plugins-templating-velocity/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule b/silk-plugins/silk-plugins-templating-velocity/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule new file mode 100644 index 0000000000..364210cf06 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule @@ -0,0 +1 @@ +org.silkframework.plugins.templating.velocity.VelocityTemplatingPlugins diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala similarity index 77% rename from silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala rename to silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala index 2c47f8efd3..10800a17be 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/VelocityTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala @@ -1,18 +1,21 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating +package org.silkframework.plugins.templating.velocity import org.apache.velocity.VelocityContext +import org.apache.velocity.context.Context import org.apache.velocity.runtime.parser.node._ +import org.apache.velocity.{Template => VelocityTemplate} +import org.apache.velocity.runtime.RuntimeSingleton import org.silkframework.runtime.plugin.annotations.Plugin import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} -import java.io.Writer +import java.io.{StringReader, StringWriter, Writer} /** * A general-purpose templating engine based on Apache Velocity. */ @Plugin( id = VelocityTemplateEngine.id, - label = "Velocity", + label = "Velocity (deprecated)", description = "A templating engine based on Apache Velocity." ) case class VelocityTemplateEngine() extends TemplateEngine { @@ -24,6 +27,25 @@ case class VelocityTemplateEngine() extends TemplateEngine { object VelocityTemplateEngine { final val id = "velocity" + + /** Creates a Velocity template based on the given template string. */ + def createTemplate(templateString: String): VelocityTemplate = { + val service = RuntimeSingleton.getRuntimeServices + service.addProperty("runtime.strict_mode.enable", true) + val reader = new StringReader(templateString) + val template = new VelocityTemplate() + template.setRuntimeServices(service) + template.setData(service.parse(reader, template)) + template.initDocument() + template + } + + /** Renders the template with the given context. */ + def renderTemplate(template: VelocityTemplate, context: Context): String = { + val writer = new StringWriter() + template.merge(context, writer) + writer.toString + } } /** @@ -31,7 +53,7 @@ object VelocityTemplateEngine { */ class VelocityCompiledTemplate(val templateString: String) extends CompiledTemplate { - private val velocityTemplate = SparqlVelocityTemplating.createTemplate(templateString) + private val velocityTemplate = VelocityTemplateEngine.createTemplate(templateString) override lazy val variables: Option[Seq[TemplateVariableName]] = { Some(extractVariableReferences(velocityTemplate.getData.asInstanceOf[SimpleNode]) @@ -41,7 +63,7 @@ class VelocityCompiledTemplate(val templateString: String) extends CompiledTempl override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { val context = new VelocityContext() values.foreach { case (k, v) => context.put(k, v) } - writer.write(SparqlVelocityTemplating.renderTemplate(velocityTemplate, context)) + writer.write(VelocityTemplateEngine.renderTemplate(velocityTemplate, context)) } override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { diff --git a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplatingPlugins.scala b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplatingPlugins.scala new file mode 100644 index 0000000000..e23e204dab --- /dev/null +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplatingPlugins.scala @@ -0,0 +1,7 @@ +package org.silkframework.plugins.templating.velocity + +import org.silkframework.runtime.plugin.{AnyPlugin, PluginModule} + +class VelocityTemplatingPlugins extends PluginModule { + override def pluginClasses: Seq[Class[_ <: AnyPlugin]] = Seq(classOf[VelocityTemplateEngine]) +} From 4581b7dfb659666e3051cd000cfc931a1d427d77 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 15:42:05 +0100 Subject: [PATCH 18/63] Template engine improvements --- .../templating/TemplateEngineAutocompletionProvider.scala | 8 ++++---- .../runtime/templating/TemplateEngines.scala | 6 +++--- .../dataset/rdf/tasks/SparqlUpdateCustomTask.scala | 5 +++-- .../templating/SparqlUpdateTemplatingEngineSimple.scala | 3 ++- .../templating/velocity/VelocityTemplateEngine.scala | 3 ++- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngineAutocompletionProvider.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngineAutocompletionProvider.scala index cdeb12e516..31859edd83 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngineAutocompletionProvider.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngineAutocompletionProvider.scala @@ -12,10 +12,10 @@ class TemplateEngineAutocompletionProvider extends PluginParameterAutoCompletion (implicit context: PluginContext): Iterable[AutoCompletionResult] = { val multiSearchWords = extractSearchTerms(searchQuery) TemplateEngines.availableEngines - .filter(_ != DisabledTemplateEngine.id) // Disabled template engine should not be suggested to the user - .filter(_ != UnresolvedTemplateEngine.id) // Unresolved template engine should not be suggested to the user - .filter(r => matchesSearchTerm(multiSearchWords, r.toLowerCase)) - .map(r => AutoCompletionResult(r, None)) + .filter(_.id.toString != DisabledTemplateEngine.id) // Disabled template engine should not be suggested to the user + .filter(_.id.toString != UnresolvedTemplateEngine.id) // Unresolved template engine should not be suggested to the user + .filter(engine => matchesSearchTerm(multiSearchWords, engine.id.toLowerCase)) + .map(engine => AutoCompletionResult(engine.id, Some(engine.label))) } /** Returns the label if exists for the given auto-completion value. This is needed if a value should diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala index d0fd501d44..c5fc511df0 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala @@ -1,6 +1,6 @@ package org.silkframework.runtime.templating -import org.silkframework.runtime.plugin.{PluginContext, PluginRegistry} +import org.silkframework.runtime.plugin.{PluginContext, PluginDescription, PluginRegistry} /** * Manages available template engines. @@ -10,8 +10,8 @@ object TemplateEngines { /** * Returns a list of all available template engines. */ - def availableEngines: Set[String] = { - PluginRegistry.availablePlugins[TemplateEngine].map(_.id.toString).toSet + def availableEngines: Seq[PluginDescription[TemplateEngine]] = { + PluginRegistry.availablePlugins[TemplateEngine] } /** diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 45541f964f..25ba56c9dc 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -30,9 +30,10 @@ case class SparqlUpdateCustomTask( batchSize: Int = SparqlUpdateCustomTask.defaultBatchSize, @Param( value = "The templating mode for the template engine. See the general documentation of this plugin for further details on the features of each template engine.", - autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider] + autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider], + autoCompleteValueWithLabels = true ) - templatingMode: String = SparqlSimpleTemplateEngine.id + templatingMode: String = "Jinja" ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala index d2ee79116c..6fabb8e3cb 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala @@ -17,7 +17,8 @@ import scala.util.matching.Regex @Plugin( id = SparqlSimpleTemplateEngine.id, label = "Simple (deprecated)", - description = "A simple templating engine that supports plain literal and URI placeholders." + description = "A simple templating engine that supports plain literal and URI placeholders.", + deprecation = "This template engine is deprecated. Please use the 'Jinja' template engine instead." ) case class SparqlSimpleTemplateEngine() extends TemplateEngine { diff --git a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala index 10800a17be..d123b955de 100644 --- a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala @@ -16,7 +16,8 @@ import java.io.{StringReader, StringWriter, Writer} @Plugin( id = VelocityTemplateEngine.id, label = "Velocity (deprecated)", - description = "A templating engine based on Apache Velocity." + description = "A templating engine based on Apache Velocity.", + deprecation = "This template engine is deprecated. Please use the 'Jinja' template engine instead." ) case class VelocityTemplateEngine() extends TemplateEngine { From f6ecbf4eae64786013beeb95a67ea5d696ed093e Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 15:56:41 +0100 Subject: [PATCH 19/63] Template engine improvements and bugfix --- .../executors/LocalSparqlUpdateExecutor.scala | 4 +-- .../rdf/tasks/SparqlSelectCustomTask.scala | 2 +- .../rdf/tasks/SparqlUpdateCustomTask.scala | 18 ++----------- .../rdf/tasks/templating/SparqlTemplate.scala | 27 ++++++++++++++----- ...arqlUpdateTemplatingEngineSimpleTest.scala | 2 +- .../velocity/VelocityTemplateEngine.scala | 12 ++++++++- 6 files changed, 38 insertions(+), 27 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala index 20a0d5c84d..0f3044a49a 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala @@ -37,7 +37,7 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) if values.forall(_.nonEmpty)) { val it = CrossProductIterator(values, expectedProperties) while (it.hasNext) { - val query = updateTask.generate(it.next(), taskProperties) + val query = updateTask.compiledTemplate.generate(it.next(), taskProperties) batchEmitter.update(query) } } @@ -75,7 +75,7 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT outputTask: Option[Task[_ <: TaskSpec]] = None) (implicit pluginContext: PluginContext): Unit = { val taskProperties = createTaskProperties(inputTask = inputTask, outputTask = outputTask, pluginContext = pluginContext) - val query = updateTask.generate(Map.empty, taskProperties) + val query = updateTask.compiledTemplate.generate(Map.empty, taskProperties) batchEmitter.update(query) } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 42af9fcf81..47c46d7865 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -25,7 +25,7 @@ import scala.util.Try "A task that executes a SPARQL Select query on a SPARQL enabled data source and outputs the SPARQL result." + " If the SPARQL source is defined on a specific graph, a FROM clause will be added to the query at execution time," + " except when there already exists a GRAPH or FROM clause in the query. FROM NAMED clauses are not injected.", - documentationFile = "SparqlSelectCustomTask.md", + documentationFile = "SparqlSelectCustomTask.md", iconFile = "sparql-select-query.svg" ) case class SparqlSelectCustomTask( diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 25ba56c9dc..f2f09cf384 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -37,26 +37,12 @@ case class SparqlUpdateCustomTask( ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") - val compiledTemplate: SparqlTemplate = { - val templateEngine = TemplateEngines.create(templatingMode) - new SparqlTemplate(templateEngine.compile(sparqlUpdateTemplate.str)) - } - - compiledTemplate.validate(batchSize) + val compiledTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, sparqlUpdateTemplate.str, batchSize) def isStaticTemplate: Boolean = compiledTemplate.isStaticTemplate def expectedInputSchema: EntitySchema = compiledTemplate.inputSchema - /** - * Generates The SPARQL Update query based on the placeholder assignments. - * @param placeholderAssignments For each placeholder in the query template - * @return - */ - def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { - compiledTemplate.generate(placeholderAssignments, taskProperties) - } - override def inputPorts: InputPorts = { if(isStaticTemplate) { InputPorts.NoInputPorts @@ -69,5 +55,5 @@ case class SparqlUpdateCustomTask( } object SparqlUpdateCustomTask { - final val defaultBatchSize = 1 + private final val defaultBatchSize = 1 } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index 85987dcac6..33dfbe9a35 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -5,7 +5,7 @@ import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate.{InputProperties, OutputProperties, Row} -import org.silkframework.runtime.templating.{CompiledTemplate, TemplateMethodUsage, TemplateVariableName} +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateMethodUsage, TemplateVariableName} import org.silkframework.runtime.validation.ValidationException import java.io.StringWriter @@ -16,7 +16,12 @@ import scala.util.{Failure, Success, Try} */ class SparqlTemplate(template: CompiledTemplate) { - /** Renders the template based on the variable assignments. */ + /** + * Renders the template based on the variable assignments. + * + * @param placeholderAssignments For each placeholder in the query template. + * @param taskProperties The input and output task properties. + * */ def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { val values = scala.collection.mutable.LinkedHashMap[String, AnyRef]() // Flat entity values (used by simple template engine) @@ -135,11 +140,21 @@ class SparqlTemplate(template: CompiledTemplate) { object SparqlTemplate { - final val ROW_VAR_NAME = "row" - final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" - final val OUTPUT_PROPERTIES_VAR_NAME = "outputProperties" + private final val ROW_VAR_NAME = "row" + private final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" + private final val OUTPUT_PROPERTIES_VAR_NAME = "outputProperties" + + private final val templatingVariables = Seq(ROW_VAR_NAME, INPUT_PROPERTIES_VAR_NAME, OUTPUT_PROPERTIES_VAR_NAME) - final val templatingVariables = Seq(ROW_VAR_NAME, INPUT_PROPERTIES_VAR_NAME, OUTPUT_PROPERTIES_VAR_NAME) + /** + * Creates a SPARQL template from a string. + */ + def create(templateEngineId: String, template: String, batchSize: Int): SparqlTemplate = { + val templateEngine = TemplateEngines.create(templateEngineId) + val sparqlTemplate = new SparqlTemplate(templateEngine.compile(template)) + sparqlTemplate.validate(batchSize) + sparqlTemplate + } /** Row API used in SPARQL templates. Represents a single row where input paths are either exactly one value or empty. * diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index de7f2c786d..c88847fe6c 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -69,7 +69,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { } it should "generate the correct SPARQL Update query from the template" in { - SparqlUpdateCustomTask(sparqlUpdateTemplate).generate(Map( + SparqlUpdateCustomTask(sparqlUpdateTemplate).compiledTemplate.generate(Map( "PROP_FROM_ENTITY_SCHEMA1" -> "urn:some:uri", "PROP_FROM_ENTITY_SCHEMA2" -> "the old label", "PROP_FROM_ENTITY_SCHEMA3" -> diff --git a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala index d123b955de..cd57771422 100644 --- a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala @@ -2,10 +2,12 @@ package org.silkframework.plugins.templating.velocity import org.apache.velocity.VelocityContext import org.apache.velocity.context.Context +import org.apache.velocity.exception.MethodInvocationException import org.apache.velocity.runtime.parser.node._ import org.apache.velocity.{Template => VelocityTemplate} import org.apache.velocity.runtime.RuntimeSingleton import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} import java.io.{StringReader, StringWriter, Writer} @@ -44,7 +46,15 @@ object VelocityTemplateEngine { /** Renders the template with the given context. */ def renderTemplate(template: VelocityTemplate, context: Context): String = { val writer = new StringWriter() - template.merge(context, writer) + try { + template.merge(context, writer) + } catch { + case ex: MethodInvocationException => + ex.getCause match { + case cause: TemplateEvaluationException => throw cause + case _ => throw new TemplateEvaluationException(ex.getMessage, Some(ex)) + } + } writer.toString } } From 4f5ae6f3685d0fc8d64b6411f4acdb6e69016bc4 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 18:01:54 +0100 Subject: [PATCH 20/63] Template bugfixes --- build.sbt | 5 +++-- .../plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala | 2 +- .../templating/SparqlUpdateTemplatingEngineSimple.scala | 2 +- .../dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/build.sbt b/build.sbt index acbe266ea7..546df3a90f 100644 --- a/build.sbt +++ b/build.sbt @@ -164,7 +164,8 @@ lazy val pluginsTemplatingVelocity = (project in file("silk-plugins/silk-plugins ) lazy val pluginsRdf = (project in file("silk-plugins/silk-plugins-rdf")) - .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile", pluginsTemplatingJinja % "test->compile", pluginsTemplatingVelocity % "test->compile") + .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile", + pluginsTemplatingJinja % "test->compile", pluginsTemplatingVelocity % "test->compile") .settings(commonSettings *) .settings( name := "Silk Plugins RDF", @@ -380,7 +381,7 @@ lazy val workbenchCore = (project in file("silk-workbench/silk-workbench-core")) lazy val workbenchWorkspace = (project in file("silk-workbench/silk-workbench-workspace")) .enablePlugins(PlayScala) - .dependsOn(workbenchCore % "compile->compile;test->test", pluginsRdf, pluginsCsv % "test->compile", pluginsXml % "test->compile") + .dependsOn(workbenchCore % "compile->compile;test->test", pluginsRdf, pluginsCsv % "test->compile", pluginsXml % "test->compile", pluginsTemplatingJinja % "test->compile") .aggregate(workbenchCore) .settings(commonSettings *) .settings( diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index f2f09cf384..df73727f02 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -33,7 +33,7 @@ case class SparqlUpdateCustomTask( autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider], autoCompleteValueWithLabels = true ) - templatingMode: String = "Jinja" + templatingMode: String = "jinja" ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala index 6fabb8e3cb..4acdbe5f68 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala @@ -23,7 +23,7 @@ import scala.util.matching.Regex case class SparqlSimpleTemplateEngine() extends TemplateEngine { override def compile(templateString: String): SparqlSimpleCompiledTemplate = { - new SparqlSimpleCompiledTemplate(templateString) + new SparqlSimpleCompiledTemplate(templateString.replace("\r\n", "\n")) } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index c88847fe6c..e6a551109d 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -56,7 +56,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { } it should "generate the correct input schema from the template" in { - SparqlUpdateCustomTask(sparqlUpdateTemplate).inputPorts match { + SparqlUpdateCustomTask(sparqlUpdateTemplate, templatingMode = SparqlSimpleTemplateEngine.id).inputPorts match { case FixedNumberOfInputs(Seq(FixedSchemaPort(schema))) => schema.typedPaths.flatMap(_.propertyUri).map(_.uri).toSet mustBe Set( "PROP_FROM_ENTITY_SCHEMA1", @@ -69,7 +69,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { } it should "generate the correct SPARQL Update query from the template" in { - SparqlUpdateCustomTask(sparqlUpdateTemplate).compiledTemplate.generate(Map( + SparqlUpdateCustomTask(sparqlUpdateTemplate, templatingMode = SparqlSimpleTemplateEngine.id).compiledTemplate.generate(Map( "PROP_FROM_ENTITY_SCHEMA1" -> "urn:some:uri", "PROP_FROM_ENTITY_SCHEMA2" -> "the old label", "PROP_FROM_ENTITY_SCHEMA3" -> From e205e4f7ad1627022f5b7c9345d3e33094100fa7 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 23 Mar 2026 18:46:31 +0100 Subject: [PATCH 21/63] Update ProjectTaskApiTest to use Jinja --- .../test/controllers/workspaceApi/ProjectTaskApiTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala b/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala index 4fd9a9aa84..a1d33651e5 100644 --- a/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala +++ b/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala @@ -94,7 +94,7 @@ class ProjectTaskApiTest extends AnyFlatSpec with SingleProjectWorkspaceProvider val datasetLabel = "In-memory dataset" val customLabel = "Custom SPARQL Update" val transformTask = "transformInContext" - project.addTask(customId, SparqlUpdateCustomTask("insert data {${}

}"), MetaData(Some(customLabel))) + project.addTask(customId, SparqlUpdateCustomTask("INSERT DATA { {{ row.uri(\"PROP_FROM_ENTITY_SCHEMA1\") }}

}"), MetaData(Some(customLabel))) project.addTask(datasetId, DatasetSpec(InMemoryDataset()), metaData = MetaData(Some(datasetLabel))) project.addTask(transformTask, TransformSpec(DatasetSelection(datasetId), output = IdentifierOptionParameter(Some(Identifier(customId))))) val TaskContextResponse(inputTasks, outputTasks, originalInputs, originalOutputs) = taskContext(projectId, transformTask, WorkflowTaskContext( From add4590b34994e81d8c446fcb93466ade592ec2c Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 26 Mar 2026 14:41:39 +0100 Subject: [PATCH 22/63] Rename SparqlTemplate to SparqlUpdateTemplate to make room for a SparqlSelectTemplate --- .../rdf/tasks/SparqlUpdateCustomTask.scala | 2 +- ...plate.scala => SparqlUpdateTemplate.scala} | 26 +++++++++---------- ...arqlUpdateTemplatingEngineSimpleTest.scala | 2 +- ...la => SparqlUpdateTemplateJinjaTest.scala} | 6 ++--- ...=> SparqlUpdateTemplateVelocityTest.scala} | 8 +++--- 5 files changed, 22 insertions(+), 22 deletions(-) rename silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlTemplate.scala => SparqlUpdateTemplate.scala} (89%) rename silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlTemplateJinjaTest.scala => SparqlUpdateTemplateJinjaTest.scala} (94%) rename silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlTemplateVelocityTest.scala => SparqlUpdateTemplateVelocityTest.scala} (92%) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index df73727f02..088e54b141 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -37,7 +37,7 @@ case class SparqlUpdateCustomTask( ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") - val compiledTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, sparqlUpdateTemplate.str, batchSize) + val compiledTemplate: SparqlUpdateTemplate = SparqlUpdateTemplate.create(templatingMode, sparqlUpdateTemplate.str, batchSize) def isStaticTemplate: Boolean = compiledTemplate.isStaticTemplate diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala similarity index 89% rename from silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala rename to silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala index 33dfbe9a35..26301bedc9 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala @@ -4,7 +4,7 @@ import org.apache.jena.update.UpdateFactory import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate.{InputProperties, OutputProperties, Row} +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlUpdateTemplate.{InputProperties, OutputProperties, Row} import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateMethodUsage, TemplateVariableName} import org.silkframework.runtime.validation.ValidationException @@ -14,7 +14,7 @@ import scala.util.{Failure, Success, Try} /** * Wraps a [[CompiledTemplate]] and adds SPARQL Update specific capabilities. */ -class SparqlTemplate(template: CompiledTemplate) { +class SparqlUpdateTemplate(template: CompiledTemplate) { /** * Renders the template based on the variable assignments. @@ -27,9 +27,9 @@ class SparqlTemplate(template: CompiledTemplate) { // Flat entity values (used by simple template engine) placeholderAssignments.foreach { case (k, v) => values(k) = v } // SPARQL context objects (used by Velocity engine) - values(SparqlTemplate.ROW_VAR_NAME) = Row(placeholderAssignments) - values(SparqlTemplate.INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) - values(SparqlTemplate.OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) + values(SparqlUpdateTemplate.ROW_VAR_NAME) = Row(placeholderAssignments) + values(SparqlUpdateTemplate.INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) + values(SparqlUpdateTemplate.OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) val writer = new StringWriter() template.evaluate(values.toMap, writer) writer.toString @@ -92,13 +92,13 @@ class SparqlTemplate(template: CompiledTemplate) { /** Returns SPARQL-specific variables, extracting paths from method usages. */ private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = { - val usages = SparqlTemplate.templatingVariables.flatMap(v => template.methodUsages(v)) + val usages = SparqlUpdateTemplate.templatingVariables.flatMap(v => template.methodUsages(v)) if (usages.nonEmpty) { - val rowVars = sparqlMethodUsages(SparqlTemplate.ROW_VAR_NAME) + val rowVars = sparqlMethodUsages(SparqlUpdateTemplate.ROW_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "")) - val inputPropVars = sparqlMethodUsages(SparqlTemplate.INPUT_PROPERTIES_VAR_NAME) + val inputPropVars = sparqlMethodUsages(SparqlUpdateTemplate.INPUT_PROPERTIES_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) - val outputPropVars = sparqlMethodUsages(SparqlTemplate.OUTPUT_PROPERTIES_VAR_NAME) + val outputPropVars = sparqlMethodUsages(SparqlUpdateTemplate.OUTPUT_PROPERTIES_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) } else { @@ -113,7 +113,7 @@ class SparqlTemplate(template: CompiledTemplate) { /** Checks if any SPARQL templating variable uses the rawUnsafe method. */ private lazy val usesRawUnsafe: Boolean = { - SparqlTemplate.templatingVariables.exists(varName => + SparqlUpdateTemplate.templatingVariables.exists(varName => sparqlMethodUsages(varName).exists(_.methodName == "rawUnsafe")) } @@ -138,7 +138,7 @@ class SparqlTemplate(template: CompiledTemplate) { } } -object SparqlTemplate { +object SparqlUpdateTemplate { private final val ROW_VAR_NAME = "row" private final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" @@ -149,9 +149,9 @@ object SparqlTemplate { /** * Creates a SPARQL template from a string. */ - def create(templateEngineId: String, template: String, batchSize: Int): SparqlTemplate = { + def create(templateEngineId: String, template: String, batchSize: Int): SparqlUpdateTemplate = { val templateEngine = TemplateEngines.create(templateEngineId) - val sparqlTemplate = new SparqlTemplate(templateEngine.compile(template)) + val sparqlTemplate = new SparqlUpdateTemplate(templateEngine.compile(template)) sparqlTemplate.validate(batchSize) sparqlTemplate } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index e6a551109d..8f289b9f5f 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -83,7 +83,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { def parse(sparqlUpdateTemplate: String, batchSize: Int = 2): Seq[SparqlUpdateTemplatePart] = { val compiled = SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate) - new SparqlTemplate(compiled).validate(batchSize) + new SparqlUpdateTemplate(compiled).validate(batchSize) compiled.sparqlUpdateTemplateParts } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateJinjaTest.scala similarity index 94% rename from silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala rename to silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateJinjaTest.scala index b3cbc85dbc..3ffaf533c3 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateJinjaTest.scala @@ -7,7 +7,7 @@ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException -class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { +class SparqlUpdateTemplateJinjaTest extends AnyFlatSpec with Matchers { behavior of "SPARQL templating with the Jinja Template Engine" @@ -115,10 +115,10 @@ class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { } def generate(template: String, assignments: Map[String, String], taskProps: TaskProperties = TaskProperties(Map.empty, Map.empty)): String = { - new SparqlTemplate(JinjaTemplateEngine().compile(template)).generate(assignments, taskProps) + new SparqlUpdateTemplate(JinjaTemplateEngine().compile(template)).generate(assignments, taskProps) } def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlTemplate(JinjaTemplateEngine().compile(template)).validate(batchSize) + new SparqlUpdateTemplate(JinjaTemplateEngine().compile(template)).validate(batchSize) } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateVelocityTest.scala similarity index 92% rename from silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala rename to silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateVelocityTest.scala index 874844b37b..284298ea86 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateVelocityTest.scala @@ -7,7 +7,7 @@ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException -class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { +class SparqlUpdateTemplateVelocityTest extends AnyFlatSpec with Matchers { behavior of "SPARQL templating with the Velocity Template Engine" @@ -79,7 +79,7 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { """SELECT * WHERE { | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") |}""".stripMargin - val template = new SparqlTemplate(VelocityTemplateEngine().compile(stringTemplate)) + val template = new SparqlUpdateTemplate(VelocityTemplateEngine().compile(stringTemplate)) for(i <- 1 to 10) { val rendered = template.generate(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i"), TaskProperties(Map.empty, Map.empty)) rendered mustBe @@ -113,10 +113,10 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { } private def generate(templateString: String, bindings: Map[String, String]): String = { - new SparqlTemplate(VelocityTemplateEngine().compile(templateString)).generate(bindings, TaskProperties(Map.empty, Map.empty)) + new SparqlUpdateTemplate(VelocityTemplateEngine().compile(templateString)).generate(bindings, TaskProperties(Map.empty, Map.empty)) } def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlTemplate(VelocityTemplateEngine().compile(template)).validate(batchSize) + new SparqlUpdateTemplate(VelocityTemplateEngine().compile(template)).validate(batchSize) } } From 702be1e870350bfe8c5545ff91aabd847a7f8a86 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 26 Mar 2026 14:46:59 +0100 Subject: [PATCH 23/63] Added simple templating to SparqlSelectCustomTask and test for it --- .../templating/TemplateVariableValue.scala | 2 +- .../executors/LocalSparqlSelectExecutor.scala | 2 +- .../rdf/tasks/SparqlSelectCustomTask.scala | 16 ++++-- .../templating/SparqlSelectTemplate.scala | 49 +++++++++++++++++++ .../rdf/LocalSparqlUpdateExecutorTest.scala | 12 ++--- .../LocalSparqlSelectExecutorTest.scala | 22 ++++++++- 6 files changed, 90 insertions(+), 13 deletions(-) create mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala index 605eb94485..3fab92d603 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala @@ -7,7 +7,7 @@ package org.silkframework.runtime.templating * @param scope The scope. May be empty. * @param values The values for this variable. */ -class TemplateVariableValue(name: String, scope: String, val values: Seq[String]) extends TemplateVariableName(name, scope) { +class TemplateVariableValue(name: String, scope: String = "", val values: Seq[String]) extends TemplateVariableName(name, scope) { def asName: TemplateVariableName = { new TemplateVariableName(name, scope) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index dcfbce6f12..9b4608cba4 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -48,7 +48,7 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT private def select(sparqlSelectTask: SparqlSelectCustomTask, sparql: SparqlEndpoint, selectLimit: Int) (implicit userContext: UserContext): SparqlResults = { - executeSelect(sparql, sparqlSelectTask.selectQuery.str, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + executeSelect(sparql, sparqlSelectTask.queryTemplate.evaluate(sparql), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) } /** diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 47c46d7865..1859e37410 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -1,13 +1,15 @@ package org.silkframework.plugins.dataset.rdf.tasks import org.apache.jena.query.QueryFactory -import org.silkframework.config.{CustomTask, FixedNumberOfInputs, FixedSchemaPort, InputPorts, Port} +import org.silkframework.config._ import org.silkframework.dataset.rdf.SparqlEndpointDatasetParameter import org.silkframework.entity._ import org.silkframework.entity.paths.{TypedPath, UntypedPath} import org.silkframework.execution.typed.SparqlEndpointEntitySchema +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSelectTemplate import org.silkframework.runtime.plugin.annotations.{Param, Plugin} import org.silkframework.runtime.plugin.types.SparqlCodeParameter +import org.silkframework.runtime.templating.TemplateEngineAutocompletionProvider import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.Uri @@ -44,13 +46,21 @@ case class SparqlSelectCustomTask( value = "SPARQL query timeout (select/update) in milliseconds. A value of zero means that there is no timeout set explicitly." + " If a value greater zero is specified this overwrites possible default timeouts." ) - sparqlTimeout: Int = 0 + sparqlTimeout: Int = 0, + @Param( + value = "The templating mode for the template engine.", + autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider], + autoCompleteValueWithLabels = true + ) + templatingMode: String = "jinja" ) extends CustomTask { val intLimit: Option[Int] = { // Only allow positive ints Try(limit.toInt).filter(_ > 0).toOption } + val queryTemplate: SparqlSelectTemplate = SparqlSelectTemplate.create(templatingMode, selectQuery.str) + override def inputPorts: InputPorts = { FixedNumberOfInputs(Seq(FixedSchemaPort(SparqlEndpointEntitySchema.schema))) } @@ -60,7 +70,7 @@ case class SparqlSelectCustomTask( } val outputSchema: EntitySchema = { - val query = QueryFactory.create(selectQuery.str) + val query = QueryFactory.create(queryTemplate.evaluateWithDefaults()) if (!query.isSelectType) { throw new ValidationException("Query is not a SELECT query!") } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala new file mode 100644 index 0000000000..7d1fd677d8 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala @@ -0,0 +1,49 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.silkframework.dataset.rdf.SparqlEndpoint +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateVariableValue} + +import java.io.StringWriter + +/** + * Wraps a [[CompiledTemplate]] and adds SPARQL SELECT specific capabilities. + */ +class SparqlSelectTemplate(template: CompiledTemplate) { + + /** + * Evaluates the template and returns the SPARQL query. + */ + def evaluate(endpoint: SparqlEndpoint): String = { + evaluateWithVariables(Seq( + new TemplateVariableValue(name = "graph", values = endpoint.sparqlParams.graph.toSeq) + )) + } + + /** + * Evaluates the template using default values for the variables and returns the SPARQL query. + */ + def evaluateWithDefaults(): String = { + evaluateWithVariables(Seq( + new TemplateVariableValue(name = "graph", values = Seq.empty) + )) + } + + private def evaluateWithVariables(variables: Seq[TemplateVariableValue]): String = { + val writer = new StringWriter + template.evaluate(variables, writer) + writer.toString + } + +} + +object SparqlSelectTemplate { + + /** + * Creates a SPARQL template from a string. + */ + def create(templateEngineId: String, template: String): SparqlSelectTemplate = { + val templateEngine = TemplateEngines.create(templateEngineId) + val sparqlTemplate = new SparqlSelectTemplate(templateEngine.compile(template)) + sparqlTemplate + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala index 81c192b838..4beb2ae8a1 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala @@ -1,23 +1,23 @@ package org.silkframework.plugins.dataset.rdf -import org.silkframework.config.{CustomTask, FixedNumberOfInputs, InputPorts, PlainTask, Port, Prefixes, Task} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers +import org.silkframework.config._ import org.silkframework.entity._ import org.silkframework.entity.paths.{TypedPath, UntypedPath} -import org.silkframework.execution.{ExecutionReport, ExecutorOutput} import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution} +import org.silkframework.execution.typed.SparqlUpdateEntitySchema +import org.silkframework.execution.{ExecutionReport, ExecutorOutput} import org.silkframework.plugins.dataset.rdf.executors.LocalSparqlUpdateExecutor import org.silkframework.plugins.dataset.rdf.tasks.SparqlUpdateCustomTask import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSimpleTemplateEngine +import org.silkframework.plugins.templating.velocity.VelocityTemplateEngine import org.silkframework.runtime.activity.{ActivityContext, UserContext} import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, TestPluginContext} import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.{Identifier, TestMocks} import org.silkframework.workspace.TestWorkspaceProviderTestTrait -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers -import org.silkframework.execution.typed.SparqlUpdateEntitySchema -import org.silkframework.plugins.templating.velocity.VelocityTemplateEngine class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestWorkspaceProviderTestTrait { behavior of "Local SPARQL Update Executor" diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index 7c79beeeb8..78894559fc 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -63,9 +63,26 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec correctTimeout mustBe true } - private def sparqlEndpointStub(selectCallback: SparqlEndpoint => Unit = _ => {}): SparqlEndpoint = { + it should "evaluate a Jinja query template using the graph variable from the SPARQL endpoint" in { + val graphUri = "http://example.org/testGraph" + val query = "SELECT * WHERE { GRAPH <{{ graph ~ \"/data\" }}> { ?s ?p ?o } }" + val task = SparqlSelectCustomTask(query) + var capturedQuery = "" + val activityContextMock = TestMocks.activityContextMock() + val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) + val sparqlEndpoint = sparqlEndpointStub(graphUri = Some(graphUri), queryCapture = q => capturedQuery = q) + LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, sparqlEndpoint, executionReportUpdater = Some(reportUpdater)).headOption + + task.outputSchema.typedPaths.map(_.toUntypedPath.normalizedSerialization) mustBe IndexedSeq("s", "p", "o") + capturedQuery must include(s"<$graphUri/data>") + capturedQuery must not include "{{ graph" + } + + private def sparqlEndpointStub(selectCallback: SparqlEndpoint => Unit = _ => {}, + graphUri: Option[String] = None, + queryCapture: String => Unit = _ => {}): SparqlEndpoint = { new SparqlEndpoint { - var sparqlParamsIntern = SparqlParams() + var sparqlParamsIntern = SparqlParams(graph = graphUri) override def sparqlParams: SparqlParams = sparqlParamsIntern override def withSparqlParams(sparqlParams: SparqlParams): SparqlEndpoint = { @@ -75,6 +92,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec override def select(query: String, limit: Int)(implicit userContext: UserContext): SparqlResults = { selectCallback(this) + queryCapture(query) SparqlResults(Seq("s", "p", "o"), new TraversableIterator[SortedMap[String, RdfNode]] { override def foreach[U](f: SortedMap[String, RdfNode] => U): Unit = { var i = 0 From 242f40cb86a744ba8dfb6bbb6e68a93e4ecf2e6f Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 26 Mar 2026 14:50:37 +0100 Subject: [PATCH 24/63] Update doc of SparqlSelectCustomTask to include templating --- .../rdf/tasks/SparqlSelectCustomTask.md | 19 +++++++++++++++++++ .../rdf/tasks/SparqlSelectCustomTask.scala | 7 ++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index 6883e97366..b816584be4 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -16,6 +16,25 @@ As usual, the SPARQL results contain both "variables" and "bindings", such as in [this example](https://www.w3.org/TR/sparql11-results-json/#json-result-object). This tabular raw form is transformed into an _entity table_. +### Templating + +The select query supports [Jinja](https://jinja.palletsprojects.com/) templating. The following variable is +automatically provided at execution time: + +| Variable | Description | +|----------|-------------| +| `graph` | The named graph URI from the input dataset's graph parameter. Empty if no graph is configured. | + +The `graph` variable can be used directly or combined with Jinja expressions. For example, to query a named graph +whose URI is derived by appending a suffix to the configured graph URI: + +```sparql +SELECT * WHERE { GRAPH <{{ graph ~ "/data" }}> { ?s ?p ?o } } +``` + +The output schema (i.e. the result variables) is derived from the query at configuration time by evaluating the +template with default values (empty `graph`), so the query must remain valid SPARQL regardless of the graph value. + ### Internal Specifics If the SPARQL source is defined on a specific graph, a `FROM` clause will be added to the query at execution time, diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 1859e37410..dde4915ec8 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -31,7 +31,12 @@ import scala.util.Try iconFile = "sparql-select-query.svg" ) case class SparqlSelectCustomTask( - @Param(label = "Select query", value = "A SPARQL 1.1 select query", example = "select * where { ?s ?p ?o }") + @Param( + label = "Select query", + value = "A SPARQL 1.1 select query. The query supports Jinja templating. " + + "The 'graph' variable is automatically provided from the input dataset's graph parameter. " + + "Example with graph: SELECT * WHERE { GRAPH <{{ graph ~ \"/data\" }}> { ?s ?p ?o } }", + example = "select * where { ?s ?p ?o }") selectQuery: SparqlCodeParameter, @Param(label = "Result limit", value = "If set to a positive integer, the number of results is limited") limit: String = "", From 8babc80d51dae18d801afa01163fdfc09d1c7aa7 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 26 Mar 2026 14:55:20 +0100 Subject: [PATCH 25/63] Update doc of SparqlUpdateCustomTask --- .../rdf/tasks/SparqlUpdateCustomTask.md | 33 ++++++++++++++++--- .../rdf/tasks/SparqlUpdateCustomTask.scala | 2 +- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md index 6106aeec5c..b51f12a43b 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md @@ -12,12 +12,35 @@ _execute_ these queries, we need to connect this task from an input into an outp ## Templating The `sparqlUpdateOperator` plugin uses a **template** in order to construct and output SPARQL update queries. -There are two possible template engines supported by this plugin: a `Simple` engine and +Three template engines are supported: `Jinja` (the default), `Simple`, and [`Velocity Engine`](https://velocity.apache.org/engine/2.4.1/user-guide.html). -Each of these engines supports a different set of templating features, such as for example _variable interpolation_ with -the dollar sign (`$`), i.e. filling in input values via placeholders in the template. +The `Simple` and `Velocity Engine` modes are deprecated. -### Example of the `Simple` mode +### Example of the `Jinja` mode + +[Jinja](https://jinja.palletsprojects.com/) is the recommended template engine. It uses `{{ }}` for expressions and +`{% %}` for control flow statements such as conditionals. + +``` +DELETE DATA { {{ row.uri("PROP_FROM_ENTITY_SCHEMA1") }} rdf:label {{ row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") }} } ; +{% if row.exists("PROP_FROM_ENTITY_SCHEMA1") %} + INSERT DATA { {{ row.uri("PROP_FROM_ENTITY_SCHEMA1") }} rdf:label {{ row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3") }} } ; +{% endif %} +``` + +Input values are accessible via methods on the `row` variable: + +- `row.uri(inputPath)`: Renders an input value as **URI**. Throws an exception if the value isn't a valid URI. +- `row.plainLiteral(inputPath)`: Renders an input value as **plain literal**, i.e. it escapes problematic characters. +- `row.rawUnsafe(inputPath)`: Renders an input value as is, i.e. **no escaping** is done. This should **only** be used if the input values can be trusted. +- `row.exists(inputPath)`: Returns `true` if a value for the input path **exists**, else `false`. + +The methods `uri`, `plainLiteral` and `rawUnsafe` throw an exception if no input value is available for the given input path. + +In addition to input values, properties of the input and output tasks can be accessed via the `inputProperties` and +`outputProperties` objects in the same way as the `row` object. For example with `{{ inputProperties.uri("graph") }}`. + +### Example of the `Simple` mode (deprecated) ``` DELETE DATA { ${} rdf:label ${"PROP_FROM_ENTITY_SCHEMA2"} } @@ -32,7 +55,7 @@ Furthermore, it will insert a plain literal serialization for the property value It is also possible to write something like `${"PROP"}^^` or `${"PROP"}@en`. In other words, we can combine variable substitutions with fixed expressions to construct semi-flexible expressions within the template. -### Example of the `Velocity Engine` mode +### Example of the `Velocity Engine` mode (deprecated) ``` DELETE DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 088e54b141..9083abc632 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -21,7 +21,7 @@ case class SparqlUpdateCustomTask( @Param( label = "SPARQL update query", value = "The SPARQL UPDATE template for constructing SPARQL UPDATE queries for every entity from the input." + - " The possible values for the template engine are `Simple`, `Velocity Engine` and `Jinja`." + + " The possible values for the template engine are `Jinja` (default), `Simple` and `Velocity Engine`." + " See the general documentation of this plugin for further details on the features of each template engine.", example = "DELETE DATA { ${} rdf:label ${\"PROP_FROM_ENTITY_SCHEMA2\"} }" ) From 5829a9892fd1e448ac7213c36efd4bd60e1d0493 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 26 Mar 2026 15:41:52 +0100 Subject: [PATCH 26/63] Updated outdated example. --- .../plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 9083abc632..f785520fa1 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -23,7 +23,7 @@ case class SparqlUpdateCustomTask( value = "The SPARQL UPDATE template for constructing SPARQL UPDATE queries for every entity from the input." + " The possible values for the template engine are `Jinja` (default), `Simple` and `Velocity Engine`." + " See the general documentation of this plugin for further details on the features of each template engine.", - example = "DELETE DATA { ${} rdf:label ${\"PROP_FROM_ENTITY_SCHEMA2\"} }" + example = "INSERT DATA { {{ row.uri(\"PROP_FROM_ENTITY_SCHEMA1\") }} rdf:label {{ row.plainLiteral(\"PROP_FROM_ENTITY_SCHEMA2\") }} } ;" ) sparqlUpdateTemplate: SparqlCodeParameter, @Param(label = "Batch size", value = "How many entities should be handled in a single update request.") From 73ac5ab95691b57c4ecfa0da0a50e0fd4c70ad7c Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 26 Mar 2026 17:18:28 +0100 Subject: [PATCH 27/63] Fix tests --- .../dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala | 4 ++-- .../test/controllers/workspace/TaskApiTest.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index 78894559fc..c3599935ff 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -29,8 +29,8 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec val task = SparqlSelectCustomTask("SELECT * WHERE {?s ?p ?o}") val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) val sparqlEndpoint = new SparqlEndpoint { - override def sparqlParams: SparqlParams = ??? - override def withSparqlParams(sparqlParams: SparqlParams): SparqlEndpoint = ??? + override def sparqlParams: SparqlParams = SparqlParams() + override def withSparqlParams(sparqlParams: SparqlParams): SparqlEndpoint = this override def select(query: String, limit: Int)(implicit userContext: UserContext): SparqlResults = { val entities = for(i <- Iterator.range(0, limit)) yield { diff --git a/silk-workbench/silk-workbench-workspace/test/controllers/workspace/TaskApiTest.scala b/silk-workbench/silk-workbench-workspace/test/controllers/workspace/TaskApiTest.scala index 607b731a0f..756ed0575c 100644 --- a/silk-workbench/silk-workbench-workspace/test/controllers/workspace/TaskApiTest.scala +++ b/silk-workbench/silk-workbench-workspace/test/controllers/workspace/TaskApiTest.scala @@ -362,7 +362,7 @@ class TaskApiTest extends PlaySpec with IntegrationTestTrait with Matchers { p.addAnyTask(sparqlSelect, SparqlSelectCustomTask("SELECT * WHERE {?s ?p ?o}", optionalInputDataset = SparqlEndpointDatasetParameter(inMemoryDataset))) p.addAnyTask(sparqlDataset, DatasetSpec(SparqlDataset("http://endpoint"))) // Check tasks - taskValuesWithLabel(sparqlSelect).filter(_._2.isDefined) mustBe Seq(JsString(inMemoryDataset) -> Some(inMemoryDatasetLabel)) + taskValuesWithLabel(sparqlSelect).filter(_._2.isDefined) must contain theSameElementsAs Seq(JsString("jinja") -> Some("Jinja"), JsString(inMemoryDataset) -> Some(inMemoryDatasetLabel)) taskValuesWithLabel(sparqlDataset).filter(_._2.isDefined) mustBe Seq(JsString("parallel") -> Some("parallel")) taskValuesWithLabel(workflowId) // Just check that it returns anything taskValuesWithLabel(linkTaskId) From efd749f588bddf4f052eef84522cc8c2927701f5 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 30 Mar 2026 13:35:29 +0200 Subject: [PATCH 28/63] Moved conversion from entites to variables. --- .../runtime/templating/TemplateEngine.scala | 18 ---------------- .../runtime/templating/TemplateVariable.scala | 2 +- .../TemplateVariableConversions.scala | 21 +++++++++++++++++++ 3 files changed, 22 insertions(+), 19 deletions(-) create mode 100644 silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala index ce6566b986..47882a2e96 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala @@ -50,24 +50,6 @@ trait CompiledTemplate { */ def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig = EvaluationConfig()): Unit - /** - * Evaluates this template using a provided entity. - * - * @throws TemplateEvaluationException If the evaluation failed. - */ - def evaluate(entity: Entity, writer: Writer): Unit = { - evaluate(entityToMap(entity), writer) - } - - /** - * Converts an entity to a sequence of template variables. - */ - protected def entityToMap(entity: Entity): Seq[TemplateVariableValue] = { - for((path, value) <- entity.schema.typedPaths zip entity.values if value.nonEmpty) yield { - new TemplateVariableValue(path.normalizedSerialization, "", value) - } - } - /** * Converts template values to a Java Map */ diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala index 78a60f6566..be2f6d5d62 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala @@ -13,7 +13,7 @@ case class TemplateVariable(override val name: String, template: Option[String] = None, description: Option[String] = None, isSensitive: Boolean = false, - override val scope: String) extends TemplateVariableValue(name, scope, values = Seq(value)) { + override val scope: String = "") extends TemplateVariableValue(name, scope, values = Seq(value)) { validate() diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala new file mode 100644 index 0000000000..5a3bb488f5 --- /dev/null +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala @@ -0,0 +1,21 @@ +package org.silkframework.runtime.templating + +import org.silkframework.config.{Task, TaskSpec} +import org.silkframework.entity.Entity + +object TemplateVariableConversions { + + /** + * Converts an entity to a sequence of template variables. + */ + def fromEntity(entity: Entity, scope: String = ""): Seq[TemplateVariableValue] = { + for((path, value) <- entity.schema.typedPaths zip entity.values if value.nonEmpty) yield { + new TemplateVariableValue(path.normalizedSerialization, scope, value) + } + } + + def fromTask(task: Task[_ <: TaskSpec]): Seq[TemplateVariableValue] = { + + } + +} From e706f7edb7c852b4342da4d57e995103f1c4a30d Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 30 Mar 2026 15:35:09 +0200 Subject: [PATCH 29/63] Improved Variable scope: Now it's a sequence instead of a simple string to make empty and hierarchical scopes more explicit. --- .../CombinedTemplateVariablesReader.scala | 2 +- .../GlobalTemplateVariablesConfig.scala | 2 +- .../InMemoryTemplateVariablesReader.scala | 2 +- .../runtime/templating/TemplateEngine.scala | 18 ++++++------------ .../runtime/templating/TemplateVariable.scala | 6 +++--- .../TemplateVariableConversions.scala | 3 ++- .../templating/TemplateVariableName.scala | 16 ++++++---------- .../templating/TemplateVariableScopes.scala | 4 ++-- .../templating/TemplateVariableValue.scala | 2 +- .../templating/TemplateVariablesReader.scala | 2 +- .../templating/TemplateVariableNameTest.scala | 6 +++--- .../templating/SparqlUpdateTemplate.scala | 12 ++++++------ .../SparqlUpdateTemplatingEngineSimple.scala | 2 +- .../templating/jinja/JinjaTemplateEngine.scala | 2 +- .../jinja/JinjaVariableCollector.scala | 17 +++++++++-------- .../templating/jinja/JinjaEngineTest.scala | 2 +- .../velocity/VelocityTemplateEngine.scala | 2 +- .../coreApi/VariableTemplateApi.scala | 6 +++--- .../ProjectTemplateVariablesManager.scala | 2 +- .../workspace/io/WorkspaceIO.scala | 4 ++-- .../workspace/WorkspaceProviderTestTrait.scala | 14 +++++++------- .../xml/XmlZipProjectMarshalingTest.scala | 4 ++-- 22 files changed, 61 insertions(+), 69 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/CombinedTemplateVariablesReader.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/CombinedTemplateVariablesReader.scala index a19f3f105c..872e17761a 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/CombinedTemplateVariablesReader.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/CombinedTemplateVariablesReader.scala @@ -6,7 +6,7 @@ case class CombinedTemplateVariablesReader(readers: Seq[TemplateVariablesReader] /** * The available variable scopes. */ - override def scopes: Set[String] = { + override def scopes: Set[Seq[String]] = { readers.flatMap(_.scopes).toSet } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/GlobalTemplateVariablesConfig.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/GlobalTemplateVariablesConfig.scala index 13edc6c9b1..c24ca7da38 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/GlobalTemplateVariablesConfig.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/GlobalTemplateVariablesConfig.scala @@ -75,7 +75,7 @@ object GlobalTemplateVariables extends TemplateVariablesReader with Serializable /** * The available variable scopes. */ - override def scopes: Set[String] = Set(TemplateVariableScopes.global) + override def scopes: Set[Seq[String]] = Set(TemplateVariableScopes.global) /** * Retrieves all template variables. diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/InMemoryTemplateVariablesReader.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/InMemoryTemplateVariablesReader.scala index 399d124fae..b6e700e179 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/InMemoryTemplateVariablesReader.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/InMemoryTemplateVariablesReader.scala @@ -1,3 +1,3 @@ package org.silkframework.runtime.templating -case class InMemoryTemplateVariablesReader(override val all: TemplateVariables, override val scopes: Set[String]) extends TemplateVariablesReader +case class InMemoryTemplateVariablesReader(override val all: TemplateVariables, override val scopes: Set[Seq[String]]) extends TemplateVariablesReader diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala index 47882a2e96..d030876579 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala @@ -54,19 +54,13 @@ trait CompiledTemplate { * Converts template values to a Java Map */ protected def convertValues(value: Seq[TemplateVariableValue]): Map[String, AnyRef] = { - value.groupBy(_.scope).flatMap { case (scope, values) => - if (scope.isEmpty) { - for (value <- values) yield { - (value.name, IterableTemplateValues.fromValues(value.values)) - } - } else { - val nestedValues = - for (value <- values) yield { - (value.name, IterableTemplateValues.fromValues(value.values)) - } - Seq((scope, nestedValues.toMap.asJava)) - } + val (flatVars, scopedVars) = value.partition(_.scope.isEmpty) + val flatEntries = flatVars.map(v => v.name -> IterableTemplateValues.fromValues(v.values).asInstanceOf[AnyRef]) + val scopedEntries = scopedVars.groupBy(_.scope.head).map { case (topScope, vars) => + val shallowVars = vars.map(v => new TemplateVariableValue(v.name, v.scope.tail, v.values)) + topScope -> convertValues(shallowVars).asJava.asInstanceOf[AnyRef] } + (flatEntries ++ scopedEntries).toMap } } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala index be2f6d5d62..548b6ba164 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala @@ -13,7 +13,7 @@ case class TemplateVariable(override val name: String, template: Option[String] = None, description: Option[String] = None, isSensitive: Boolean = false, - override val scope: String = "") extends TemplateVariableValue(name, scope, values = Seq(value)) { + override val scope: Seq[String] = Seq.empty) extends TemplateVariableValue(name, scope, values = Seq(value)) { validate() @@ -49,14 +49,14 @@ object TemplateVariable { template = Option((value \ "Template").text).filter(_.trim.nonEmpty), description = Option((value \ "Description").text).filter(_.trim.nonEmpty), isSensitive = (value \ "@isSensitive").text.toBoolean, - scope = (value \ "@scope").text, + scope = (value \ "@scope").text.split('.').filter(_.nonEmpty).toSeq, ) } override def write(value: TemplateVariable)(implicit writeContext: WriteContext[Node]): Node = { + scope={value.scope.mkString(".")}> {PCData(value.value)} { value.template.toSeq.map(template => ) } {value.description.getOrElse("")} diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala index 5a3bb488f5..e27166b52e 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala @@ -2,13 +2,14 @@ package org.silkframework.runtime.templating import org.silkframework.config.{Task, TaskSpec} import org.silkframework.entity.Entity +import org.silkframework.runtime.plugin.{AnyPlugin, ParameterValues, PluginContext, PluginDescription, PluginObjectParameterTypeTrait, StringParameterType} object TemplateVariableConversions { /** * Converts an entity to a sequence of template variables. */ - def fromEntity(entity: Entity, scope: String = ""): Seq[TemplateVariableValue] = { + def fromEntity(entity: Entity, scope: Seq[String] = Seq.empty): Seq[TemplateVariableValue] = { for((path, value) <- entity.schema.typedPaths zip entity.values if value.nonEmpty) yield { new TemplateVariableValue(path.normalizedSerialization, scope, value) } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala index 98f6c92dbf..cd6b6a1dbf 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala @@ -6,17 +6,13 @@ package org.silkframework.runtime.templating * @param name The local name of the variable. * @param scope The scope. May be empty. */ -class TemplateVariableName(val name: String, val scope: String) { +class TemplateVariableName(val name: String, val scope: Seq[String] = Seq.empty) { /** * The variable name including its scope, e.g., `project.var` */ def scopedName: String = { - if (scope.nonEmpty) { - scope + "." + name - } else { - name - } + (scope :+ name).mkString(".") } override def toString: String = { @@ -37,11 +33,11 @@ class TemplateVariableName(val name: String, val scope: String) { object TemplateVariableName { def parse(fullName: String): TemplateVariableName = { - val pointIndex = fullName.indexOf('.'.toInt) - if(pointIndex != -1) { - new TemplateVariableName(fullName.substring(pointIndex + 1), fullName.substring(0, pointIndex)) + val parts = fullName.split('.') + if (parts.length > 1) { + new TemplateVariableName(parts.last, parts.dropRight(1).toSeq) } else { - new TemplateVariableName(fullName, "") + new TemplateVariableName(fullName, Seq.empty) } } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala index 62c016322f..b075800568 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala @@ -5,11 +5,11 @@ object TemplateVariableScopes { /** * Global variables. */ - final val global = "global" + final val global: Seq[String] = Seq("global") /** * Project variables. */ - final val project = "project" + final val project: Seq[String] = Seq("project") } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala index 3fab92d603..c545322a0c 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala @@ -7,7 +7,7 @@ package org.silkframework.runtime.templating * @param scope The scope. May be empty. * @param values The values for this variable. */ -class TemplateVariableValue(name: String, scope: String = "", val values: Seq[String]) extends TemplateVariableName(name, scope) { +class TemplateVariableValue(name: String, scope: Seq[String] = Seq.empty, val values: Seq[String]) extends TemplateVariableName(name, scope) { def asName: TemplateVariableName = { new TemplateVariableName(name, scope) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala index e7aa0ce863..7a60cf1096 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala @@ -11,7 +11,7 @@ trait TemplateVariablesReader { /** * The available variable scopes. */ - def scopes: Set[String] + def scopes: Set[Seq[String]] /** * Retrieves all template variables. diff --git a/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableNameTest.scala b/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableNameTest.scala index bde9114c2d..ca802c2bc7 100644 --- a/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableNameTest.scala +++ b/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableNameTest.scala @@ -8,9 +8,9 @@ class TemplateVariableNameTest extends AnyFlatSpec with Matchers { behavior of "TemplateVariableName" it should "parse full names" in { - TemplateVariableName.parse("project.var") shouldBe new TemplateVariableName("var", "project") - TemplateVariableName.parse("var") shouldBe new TemplateVariableName("var", "") - TemplateVariableName.parse("a.b.c") shouldBe new TemplateVariableName("b.c", "a") + TemplateVariableName.parse("project.var") shouldBe new TemplateVariableName("var", Seq("project")) + TemplateVariableName.parse("var") shouldBe new TemplateVariableName("var", Seq.empty) + TemplateVariableName.parse("a.b.c") shouldBe new TemplateVariableName("c", Seq("a", "b")) } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala index 26301bedc9..999eaa4f8f 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala @@ -44,8 +44,8 @@ class SparqlUpdateTemplate(template: CompiledTemplate) { val genericUri = "urn:generic:1" val entityVariables = entityVariableNames val assignments = entityVariables.map(_ -> genericUri).toMap - val inputPropVars = taskPropertyVariableNames("inputProperties").map(_ -> genericUri).toMap - val outputPropVars = taskPropertyVariableNames("outputProperties").map(_ -> genericUri).toMap + val inputPropVars = taskPropertyVariableNames(Seq("inputProperties")).map(_ -> genericUri).toMap + val outputPropVars = taskPropertyVariableNames(Seq("outputProperties")).map(_ -> genericUri).toMap val taskProps = TaskProperties(inputPropVars, outputPropVars) val sparqlQuery = Try(generate(assignments, taskProps)) match { case Failure(exception) => @@ -95,11 +95,11 @@ class SparqlUpdateTemplate(template: CompiledTemplate) { val usages = SparqlUpdateTemplate.templatingVariables.flatMap(v => template.methodUsages(v)) if (usages.nonEmpty) { val rowVars = sparqlMethodUsages(SparqlUpdateTemplate.ROW_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "")) + .map(u => new TemplateVariableName(u.parameterValue)) val inputPropVars = sparqlMethodUsages(SparqlUpdateTemplate.INPUT_PROPERTIES_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "inputProperties")) + .map(u => new TemplateVariableName(u.parameterValue, Seq("inputProperties"))) val outputPropVars = sparqlMethodUsages(SparqlUpdateTemplate.OUTPUT_PROPERTIES_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, "outputProperties")) + .map(u => new TemplateVariableName(u.parameterValue, Seq("outputProperties"))) Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) } else { template.variables @@ -128,7 +128,7 @@ class SparqlUpdateTemplate(template: CompiledTemplate) { } /** Returns variable names for a specific scope (e.g. "inputProperties", "outputProperties"). */ - private def taskPropertyVariableNames(scope: String): Seq[String] = { + private def taskPropertyVariableNames(scope: Seq[String]): Seq[String] = { sparqlVariables match { case Some(vars) => vars.filter(_.scope == scope).map(_.name).distinct diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala index 4acdbe5f68..a6e1668ccc 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala @@ -40,7 +40,7 @@ object SparqlSimpleTemplateEngine { class SparqlSimpleCompiledTemplate(val sparqlUpdateTemplate: String) extends CompiledTemplate { override lazy val variables: Option[Seq[TemplateVariableName]] = Some( - properties.map(p => new TemplateVariableName(p, "")) + properties.map(p => new TemplateVariableName(p, Seq.empty)) ) override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala index 15288387d3..5aa3237145 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala @@ -92,7 +92,7 @@ class JinjaTemplate(val node: Node) extends CompiledTemplate { // Collect all scoped variables of the form 'scope.name' val names = values.map(_.asName) // Variables of the form 'scope.name' can also be addressed as just 'scope' - val scopes = values.map(v => new TemplateVariableName(v.scope, "")) + val scopes = values.filter(_.scope.nonEmpty).map(v => new TemplateVariableName(v.scope.mkString("."), Seq.empty)) // Find missing vars val existingVars = (names ++ scopes).toSet missingVars = vars.filterNot(existingVars.contains) diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala index ff1c263898..6188d13a2f 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala @@ -51,7 +51,7 @@ class JinjaVariableCollector { val loopVars = new HelperStringTokenizer(parts(0)).splitComma(true).allTokens val loopedVars = collectFromExpression(parts(1)) val childVars = collectFromChildren(tagNode, scope.withBoundNames(loopVars.asScala.toSeq)) - val filtedChildVars = childVars.unboundVars.filterNot(v => v.scope == "loop" || v.name == "loop" ) + val filtedChildVars = childVars.unboundVars.filterNot(v => v.scope == Seq("loop") || v.name == "loop" ) loopedVars.withUnbound(filtedChildVars) } else { collectFromChildren(tagNode, scope) @@ -95,17 +95,18 @@ class JinjaVariableCollector { val tree = builder.build(EXPRESSION_START_TOKEN + expression + EXPRESSION_END_TOKEN) // Manually treat simple expressions of the form `project.variable` or `variable.method(...)` expression match { - case JinjaVariableCollector.scopedName(scope, name) => + case JinjaVariableCollector.scopedName(scopePart, name) => + val scope = scopePart.dropRight(1).split('.').toSeq Scope( unboundVars = Seq(new TemplateVariableName(name, scope)) ) case JinjaVariableCollector.methodCallOnVar(varName) => Scope( - unboundVars = Seq(new TemplateVariableName(varName, "")) + unboundVars = Seq(new TemplateVariableName(varName, Seq.empty)) ) case _ => Scope( - unboundVars = tree.getIdentifierNodes.asScala.map(_.getName).filterNot(ignoreIdentifierNode).toSeq.map(new TemplateVariableName(_, "")) + unboundVars = tree.getIdentifierNodes.asScala.map(_.getName).filterNot(ignoreIdentifierNode).toSeq.map(new TemplateVariableName(_, Seq.empty)) ) } } catch { @@ -113,7 +114,7 @@ class JinjaVariableCollector { // Fallback: try to extract the leading variable from method call expressions like `var.method(...)` expression match { case JinjaVariableCollector.methodCallOnVar(varName) => - Scope(unboundVars = Seq(new TemplateVariableName(varName, ""))) + Scope(unboundVars = Seq(new TemplateVariableName(varName, Seq.empty))) case _ => Scope.empty } @@ -132,7 +133,7 @@ class JinjaVariableCollector { case class Scope(unboundVars: Seq[TemplateVariableName], boundVars: Seq[TemplateVariableName] = Seq.empty) { def withBoundNames(varNames: Seq[String]): Scope = { - withBound(varNames.map(new TemplateVariableName(_, ""))) + withBound(varNames.map(new TemplateVariableName(_, Seq.empty))) } def withBound(varNames: Seq[TemplateVariableName]): Scope = { @@ -167,8 +168,8 @@ object JinjaVariableCollector { // Regex for valid variable names private val variableRegex = "[a-zA-Z_][a-zA-Z0-9_]*".r - // Regex for scoped names of the form scope.var - private val scopedName = s"($variableRegex)\\.($variableRegex)".r + // Regex for scoped names of the form scope1[.scope2]*.var + private val scopedName = s"((?:$variableRegex\\.)+)($variableRegex)".r // Regex for method calls on a variable of the form var.method(...) private val methodCallOnVar = s"($variableRegex)\\.$variableRegex\\(.*\\)".r diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala index 37cde35ce7..3980b764ab 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala @@ -169,7 +169,7 @@ class JinjaEngineTest extends AnyFlatSpec with Matchers { val compileTemplate = JinjaTemplateEngine().compile(template) val templateValues = for((name, value) <- values.toSeq) yield { - new TemplateVariableValue(name, "", value) + new TemplateVariableValue(name, Seq.empty, value) } compileTemplate.evaluate(templateValues, writer) writer.toString diff --git a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala index cd57771422..3ddad7ef76 100644 --- a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala @@ -68,7 +68,7 @@ class VelocityCompiledTemplate(val templateString: String) extends CompiledTempl override lazy val variables: Option[Seq[TemplateVariableName]] = { Some(extractVariableReferences(velocityTemplate.getData.asInstanceOf[SimpleNode]) - .map(name => new TemplateVariableName(name, "")).distinct) + .map(name => new TemplateVariableName(name, Seq.empty)).distinct) } override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { diff --git a/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala b/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala index 3096be3da9..647319d607 100644 --- a/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala +++ b/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala @@ -15,7 +15,7 @@ import io.swagger.v3.oas.annotations.tags.Tag import io.swagger.v3.oas.annotations.{Operation, Parameter} import org.silkframework.runtime.templating.exceptions._ import org.silkframework.runtime.templating.operations.{DeleteVariableModification, UpdateVariableModification, UpdateVariablesModification} -import org.silkframework.runtime.templating.{GlobalTemplateVariables, TemplateVariable, TemplateVariables} +import org.silkframework.runtime.templating.{GlobalTemplateVariables, TemplateVariable, TemplateVariableScopes, TemplateVariables} import org.silkframework.runtime.validation.BadUserInputException import org.silkframework.workspace.WorkspaceFactory import play.api.libs.json.{JsValue, Json, OFormat} @@ -333,7 +333,7 @@ class VariableTemplateApi @Inject()() extends InjectedController with UserContex val dependencyErrors = ex.issues.collect { case TemplateVariableEvaluationException(dependentVar, unboundEx: UnboundVariablesException) => - (dependentVar.name, unboundEx.missingVars.filter(_.scope == "project").map(_.name)) + (dependentVar.name, unboundEx.missingVars.filter(_.scope == TemplateVariableScopes.project).map(_.name)) }.filter(_._2.nonEmpty).toMap if(dependencyErrors.nonEmpty) { throw new CannotReorderVariablesException(dependencyErrors) @@ -449,7 +449,7 @@ object VariableTemplateApi { example = "project", requiredMode = RequiredMode.REQUIRED ) - scope: String) { + scope: Seq[String]) { def convert: TemplateVariable = { if (value.isEmpty && template.isEmpty) { throw new BadUserInputException("Either the variable value or its template has to be defined.") diff --git a/silk-workspace/src/main/scala/org/silkframework/workspace/ProjectTemplateVariablesManager.scala b/silk-workspace/src/main/scala/org/silkframework/workspace/ProjectTemplateVariablesManager.scala index 5ecb077291..aca3a89aa5 100644 --- a/silk-workspace/src/main/scala/org/silkframework/workspace/ProjectTemplateVariablesManager.scala +++ b/silk-workspace/src/main/scala/org/silkframework/workspace/ProjectTemplateVariablesManager.scala @@ -19,7 +19,7 @@ class ProjectTemplateVariablesManager(serializer: TemplateVariablesSerializer, l /** * The available variable scopes. */ - def scopes: Set[String] = GlobalTemplateVariables.scopes + projectScope + def scopes: Set[Seq[String]] = GlobalTemplateVariables.scopes + projectScope /** * Retrieves all template variables. diff --git a/silk-workspace/src/main/scala/org/silkframework/workspace/io/WorkspaceIO.scala b/silk-workspace/src/main/scala/org/silkframework/workspace/io/WorkspaceIO.scala index ebe77e3dee..6150bbc312 100644 --- a/silk-workspace/src/main/scala/org/silkframework/workspace/io/WorkspaceIO.scala +++ b/silk-workspace/src/main/scala/org/silkframework/workspace/io/WorkspaceIO.scala @@ -6,7 +6,7 @@ import org.silkframework.rule.{LinkSpec, TransformSpec} import org.silkframework.runtime.activity.UserContext import org.silkframework.runtime.plugin.PluginContext import org.silkframework.runtime.resource.ResourceManager -import org.silkframework.runtime.templating.{CombinedTemplateVariablesReader, GlobalTemplateVariables, InMemoryTemplateVariablesReader, TemplateVariables} +import org.silkframework.runtime.templating.{CombinedTemplateVariablesReader, GlobalTemplateVariables, InMemoryTemplateVariablesReader, TemplateVariableScopes, TemplateVariables} import org.silkframework.util.Identifier import org.silkframework.workspace.activity.workflow.Workflow import org.silkframework.workspace.resources.ResourceRepository @@ -88,7 +88,7 @@ object WorkspaceIO { prefixes: Prefixes, variables: TemplateVariables) (implicit userContext: UserContext): Unit = { - val variablesReader = CombinedTemplateVariablesReader(Seq(GlobalTemplateVariables, InMemoryTemplateVariablesReader(variables, Set("project")))) + val variablesReader = CombinedTemplateVariablesReader(Seq(GlobalTemplateVariables, InMemoryTemplateVariablesReader(variables, Set(TemplateVariableScopes.project)))) implicit val inputContext: PluginContext = PluginContext(resources = inputResources, prefixes = prefixes, user = userContext, templateVariables = variablesReader) for(taskTry <- inputWorkspace.readTasks[T](projectName)) { taskTry.taskOrError match { diff --git a/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala b/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala index 8cc03e705c..f3838ff3da 100644 --- a/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala +++ b/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala @@ -20,7 +20,7 @@ import org.silkframework.runtime.activity.{SimpleUserContext, UserContext} import org.silkframework.runtime.plugin.annotations.Plugin import org.silkframework.runtime.plugin._ import org.silkframework.runtime.resource.ResourceNotFoundException -import org.silkframework.runtime.templating.{TemplateVariable, TemplateVariables} +import org.silkframework.runtime.templating.{TemplateVariable, TemplateVariableScopes, TemplateVariables} import org.silkframework.runtime.users.DefaultUserManager import org.silkframework.util.{Identifier, MockitoSugar, Uri} import org.silkframework.workspace.WorkspaceProviderTestPlugins.{FailingCustomTask, FailingTaskException} @@ -690,9 +690,9 @@ trait WorkspaceProviderTestTrait extends AnyFlatSpec with Matchers with MockitoS // Add variables and read again val templateVariables1 = TemplateVariables(Seq( - TemplateVariable("myVar1", "myValue1", None, None, isSensitive = false, "project"), - TemplateVariable("myVar2", "myValue2", None, Some("test description"), isSensitive = true, "project"), - TemplateVariable("myVar3", "myValue2b", Some("{{project.myVar2}}b"), None, isSensitive = true, "project") + TemplateVariable("myVar1", "myValue1", None, None, isSensitive = false, TemplateVariableScopes.project), + TemplateVariable("myVar2", "myValue2", None, Some("test description"), isSensitive = true, TemplateVariableScopes.project), + TemplateVariable("myVar3", "myValue2b", Some("{{project.myVar2}}b"), None, isSensitive = true, TemplateVariableScopes.project) )) variables.putVariables(templateVariables1) refreshTest { @@ -701,9 +701,9 @@ trait WorkspaceProviderTestTrait extends AnyFlatSpec with Matchers with MockitoS // Modify variables and read again val templateVariables2 = TemplateVariables(Seq( - TemplateVariable("myVar2", "myValue2", None, Some("test description 2"), isSensitive = true, "project"), - TemplateVariable("myVar4", "myValue2b", Some("{{project.myVar2}}b"), None, isSensitive = true, "project"), - TemplateVariable("myVar1", "myValue1", None, None, isSensitive = false, "project") + TemplateVariable("myVar2", "myValue2", None, Some("test description 2"), isSensitive = true, TemplateVariableScopes.project), + TemplateVariable("myVar4", "myValue2b", Some("{{project.myVar2}}b"), None, isSensitive = true, TemplateVariableScopes.project), + TemplateVariable("myVar1", "myValue1", None, None, isSensitive = false, TemplateVariableScopes.project) )) variables.putVariables(templateVariables2) refreshTest { diff --git a/silk-workspace/src/test/scala/org/silkframework/workspace/xml/XmlZipProjectMarshalingTest.scala b/silk-workspace/src/test/scala/org/silkframework/workspace/xml/XmlZipProjectMarshalingTest.scala index 80692432c6..ec66d292bd 100644 --- a/silk-workspace/src/test/scala/org/silkframework/workspace/xml/XmlZipProjectMarshalingTest.scala +++ b/silk-workspace/src/test/scala/org/silkframework/workspace/xml/XmlZipProjectMarshalingTest.scala @@ -10,7 +10,7 @@ import org.silkframework.runtime.activity.UserContext import org.silkframework.runtime.plugin.annotations.Plugin import org.silkframework.runtime.plugin.{PluginContext, PluginRegistry, TestPluginContext} import org.silkframework.runtime.resource._ -import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, InMemoryTemplateVariablesReader, TemplateEngine, TemplateVariableValue} +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, InMemoryTemplateVariablesReader, TemplateEngine, TemplateVariableScopes, TemplateVariableValue} import org.silkframework.util.{ConfigTestTrait, Uri} import org.silkframework.workspace.resources.InMemoryResourceRepository import org.silkframework.workspace.{InMemoryWorkspaceProvider, Workspace} @@ -103,7 +103,7 @@ class XmlZipProjectMarshalingTest extends AnyFlatSpec with Matchers with ConfigT variables.map("linkLimitTimesTen").value shouldBe "10000" implicit val pluginContext: PluginContext = TestPluginContext(prefixes = Prefixes.default, resources = resources, - templateVariables = InMemoryTemplateVariablesReader(variables, Set("project"))) + templateVariables = InMemoryTemplateVariablesReader(variables, Set(TemplateVariableScopes.project))) // Datasets val datasets = workspace.provider.readTasks[GenericDatasetSpec](projectName) From f0e37e312d83ce248187242abd341f139733326f Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 30 Mar 2026 15:38:38 +0200 Subject: [PATCH 30/63] Updated Scaladoc for the variable scope to make it more clear. --- .../runtime/templating/TemplateEngine.scala | 5 ++++- .../runtime/templating/TemplateVariable.scala | 9 +++++++++ .../runtime/templating/TemplateVariableName.scala | 12 ++++++++++-- .../runtime/templating/TemplateVariableScopes.scala | 8 ++++++-- .../runtime/templating/TemplateVariableValue.scala | 2 +- .../runtime/templating/TemplateVariablesReader.scala | 3 ++- .../workspaceApi/coreApi/VariableTemplateApi.scala | 3 +-- 7 files changed, 33 insertions(+), 9 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala index d030876579..a9a1804912 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala @@ -51,7 +51,10 @@ trait CompiledTemplate { def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig = EvaluationConfig()): Unit /** - * Converts template values to a Java Map + * Converts template variable values to a nested Java-compatible map. + * Variables with an empty scope are placed at the top level. + * Variables with a scope are placed in nested maps corresponding to each scope element, + * e.g., scope Seq("project", "meta") produces Map("project" -> Map("meta" -> Map(name -> value))). */ protected def convertValues(value: Seq[TemplateVariableValue]): Map[String, AnyRef] = { val (flatVars, scopedVars) = value.partition(_.scope.isEmpty) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala index 548b6ba164..501dd070a2 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala @@ -7,6 +7,15 @@ import scala.xml.{Node, PCData} /** * A single template variable. + * + * @param name The local name of the variable. + * @param value The value of the variable. + * @param template Optional template expression to compute the value dynamically. + * @param description Optional description for documentation. + * @param isSensitive True if the variable value should not be exposed to users. + * @param scope The scope as a sequence of strings forming a prefix path. May be empty. + * For example, a variable with name "label" and scope Seq("project", "metaData") + * is addressed as "project.metaData.label". */ case class TemplateVariable(override val name: String, value: String, diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala index cd6b6a1dbf..ff71d6d23b 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala @@ -4,12 +4,15 @@ package org.silkframework.runtime.templating * Holds the full name of a template variable including it's scope. * * @param name The local name of the variable. - * @param scope The scope. May be empty. + * @param scope The scope as a sequence of strings forming a prefix path. May be empty. + * For example, a variable with name "label" and scope Seq("project", "metaData") + * is addressed as "project.metaData.label". */ class TemplateVariableName(val name: String, val scope: Seq[String] = Seq.empty) { /** - * The variable name including its scope, e.g., `project.var` + * The variable name including its scope as a dot-separated string, e.g., `project.var` or `project.metaData.var`. + * If the scope is empty, this is just the local name. */ def scopedName: String = { (scope :+ name).mkString(".") @@ -32,6 +35,11 @@ class TemplateVariableName(val name: String, val scope: Seq[String] = Seq.empty) object TemplateVariableName { + /** + * Parses a dot-separated full variable name into a [[TemplateVariableName]]. + * All segments except the last form the scope; the last segment is the local name. + * For example, "project.metaData.label" parses to name="label", scope=Seq("project","metaData"). + */ def parse(fullName: String): TemplateVariableName = { val parts = fullName.split('.') if (parts.length > 1) { diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala index b075800568..15fe4ca42c 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala @@ -1,14 +1,18 @@ package org.silkframework.runtime.templating +/** + * Predefined variable scopes. Each scope is a sequence of strings that forms a prefix path + * used to address variables, e.g., a variable "label" in scope Seq("project") is addressed as "project.label". + */ object TemplateVariableScopes { /** - * Global variables. + * Scope for global variables, addressed as "global.variableName". */ final val global: Seq[String] = Seq("global") /** - * Project variables. + * Scope for project variables, addressed as "project.variableName". */ final val project: Seq[String] = Seq("project") diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala index c545322a0c..13fa368451 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala @@ -4,7 +4,7 @@ package org.silkframework.runtime.templating * Holds the full name and value of a template variable. * * @param name The local name of the variable. - * @param scope The scope. May be empty. + * @param scope The scope as a sequence of strings forming a prefix path. May be empty. * @param values The values for this variable. */ class TemplateVariableValue(name: String, scope: Seq[String] = Seq.empty, val values: Seq[String]) extends TemplateVariableName(name, scope) { diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala index 7a60cf1096..0dce817eb5 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala @@ -9,7 +9,8 @@ import org.silkframework.runtime.validation.NotFoundException trait TemplateVariablesReader { /** - * The available variable scopes. + * The available variable scopes. Each scope is represented as a sequence of strings forming a prefix path, + * e.g., Seq("project") or Seq("project", "metaData"). */ def scopes: Set[Seq[String]] diff --git a/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala b/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala index 647319d607..024955f043 100644 --- a/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala +++ b/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala @@ -445,8 +445,7 @@ object VariableTemplateApi { ) isSensitive: Boolean, @Schema( - description = "The scope of the variable.", - example = "project", + description = "The scope of the variable as a sequence of strings forming a prefix path, e.g. [\"project\"] or [\"project\", \"metaData\"].", requiredMode = RequiredMode.REQUIRED ) scope: Seq[String]) { From 5de4a6aca0ffdbb47c42855ae1acb4778c8b3feb Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 30 Mar 2026 15:40:08 +0200 Subject: [PATCH 31/63] Add converstions for TemplateVariables --- .../TemplateVariableConversions.scala | 26 ++++++++++++++++--- .../templating/TemplateVariableTest.scala | 2 +- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala index e27166b52e..3e8e6d238c 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala @@ -2,12 +2,15 @@ package org.silkframework.runtime.templating import org.silkframework.config.{Task, TaskSpec} import org.silkframework.entity.Entity -import org.silkframework.runtime.plugin.{AnyPlugin, ParameterValues, PluginContext, PluginDescription, PluginObjectParameterTypeTrait, StringParameterType} +import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, PluginObjectParameterTypeTrait, StringParameterType} object TemplateVariableConversions { /** * Converts an entity to a sequence of template variables. + * + * @param entity The entity to convert. + * @param scope The scope to assign to all resulting variables. */ def fromEntity(entity: Entity, scope: Seq[String] = Seq.empty): Seq[TemplateVariableValue] = { for((path, value) <- entity.schema.typedPaths zip entity.values if value.nonEmpty) yield { @@ -15,8 +18,25 @@ object TemplateVariableConversions { } } - def fromTask(task: Task[_ <: TaskSpec]): Seq[TemplateVariableValue] = { - + /** + * Converts a task's parameters to a sequence of template variables. + * Nested plugin parameters are placed into nested scopes using the parameter key. + * + * @param task The task whose parameters to convert. + * @param scope The base scope. Nested parameters extend this scope with the parameter key. + */ + def fromTask(task: Task[_ <: TaskSpec], scope: Seq[String] = Seq.empty)(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { + fromPluginParameters(task.data.parameters, scope) } + private def fromPluginParameters(values: ParameterValues, scope: Seq[String] = Seq.empty)(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { + for((key, value) <- values.values) yield { + value match { + case _: StringParameterType[_] => + Seq(new TemplateVariableValue(key, scope, Seq(value.toString))) + case pt: PluginObjectParameterTypeTrait => + fromPluginParameters(pt.parameters, scope :+ key) + } + } + }.flatten.toSeq } diff --git a/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableTest.scala b/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableTest.scala index 6f6e9a88a5..3974901af2 100644 --- a/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableTest.scala +++ b/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableTest.scala @@ -19,6 +19,6 @@ class TemplateVariableTest extends AnyFlatSpec with Matchers { } } - private def variableName(name: String) = TemplateVariable(name, "test value", None, None, isSensitive = false, "testScope") + private def variableName(name: String) = TemplateVariable(name, "test value", None, None, isSensitive = false, Seq("testScope")) } From ee22cef5412493acc222846dfd81a5b5e20b5f62 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 30 Mar 2026 17:07:47 +0200 Subject: [PATCH 32/63] Add full input task to SparqlSelectTemplate --- .../TemplateVariableConversions.scala | 16 ++++---- .../executors/LocalSparqlSelectExecutor.scala | 21 +++++----- .../templating/SparqlSelectTemplate.scala | 25 ++++++++---- .../LocalSparqlSelectExecutorTest.scala | 38 ++++++++++++++----- .../jinja/JinjaTemplateEngine.scala | 6 ++- .../jinja/JinjaVariableCollector.scala | 18 +++++++-- .../jinja/JinjaVariableCollectorTest.scala | 5 +++ .../transform/PeakTransformApi.scala | 6 +-- 8 files changed, 92 insertions(+), 43 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala index 3e8e6d238c..a74eddf02a 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala @@ -2,7 +2,7 @@ package org.silkframework.runtime.templating import org.silkframework.config.{Task, TaskSpec} import org.silkframework.entity.Entity -import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, PluginObjectParameterTypeTrait, StringParameterType} +import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, SimpleParameterValue} object TemplateVariableConversions { @@ -25,17 +25,19 @@ object TemplateVariableConversions { * @param task The task whose parameters to convert. * @param scope The base scope. Nested parameters extend this scope with the parameter key. */ - def fromTask(task: Task[_ <: TaskSpec], scope: Seq[String] = Seq.empty)(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { - fromPluginParameters(task.data.parameters, scope) + def fromTask(task: Task[_ <: TaskSpec], scope: Seq[String] = Seq("task"))(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { + fromPluginParameters(task.data.parameters, scope :+ "parameters") } private def fromPluginParameters(values: ParameterValues, scope: Seq[String] = Seq.empty)(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { for((key, value) <- values.values) yield { value match { - case _: StringParameterType[_] => - Seq(new TemplateVariableValue(key, scope, Seq(value.toString))) - case pt: PluginObjectParameterTypeTrait => - fromPluginParameters(pt.parameters, scope :+ key) + case sv: SimpleParameterValue => + Seq(new TemplateVariableValue(key, scope, Seq(sv.strValue))) + case nested: ParameterValues => + fromPluginParameters(nested, scope :+ key) + case _ => + Seq.empty } } }.flatten.toSeq diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index 9b4608cba4..46aac4f8cc 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -1,8 +1,8 @@ package org.silkframework.plugins.dataset.rdf.executors import org.silkframework.config.{Prefixes, Task, TaskSpec} -import org.silkframework.dataset.DataSource -import org.silkframework.dataset.rdf.{SparqlEndpoint, SparqlResults} +import org.silkframework.dataset.{DataSource, DatasetSpec} +import org.silkframework.dataset.rdf.{RdfDataset, SparqlEndpoint, SparqlResults} import org.silkframework.entity.Entity import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution, LocalExecutor} import org.silkframework.execution.typed.SparqlEndpointEntitySchema @@ -23,12 +23,11 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT context: ActivityContext[ExecutionReport]) (implicit pluginContext: PluginContext): Option[LocalEntities] = { val taskData = task.data - implicit val user: UserContext = pluginContext.user inputs match { case Seq(SparqlEndpointEntitySchema(sparql)) => implicit val executionReportUpdater: SparqlSelectExecutionReportUpdater = SparqlSelectExecutionReportUpdater(task, context) - val entities = executeOnSparqlEndpoint(taskData, sparql.task.data.plugin.sparqlEndpoint, executionReportUpdater = Some(executionReportUpdater)) + val entities = executeOnSparqlEndpoint(taskData, sparql.task, executionReportUpdater = Some(executionReportUpdater)) Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) case _ => throw TaskException("SPARQL select executor did not receive a SPARQL endpoint as requested!") @@ -36,19 +35,21 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT } def executeOnSparqlEndpoint(sparqlSelectTask: SparqlSelectCustomTask, - sparql: SparqlEndpoint, + inputTask: Task[DatasetSpec[RdfDataset]], limit: Int = Integer.MAX_VALUE, executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) - (implicit userContext: UserContext): CloseableIterator[Entity] = { + (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { val selectLimit = math.min(sparqlSelectTask.intLimit.getOrElse(Integer.MAX_VALUE), limit) - val results = select(sparqlSelectTask, sparql, selectLimit) + val results = select(sparqlSelectTask, inputTask, selectLimit) val vars: IndexedSeq[String] = getSparqlVars(sparqlSelectTask) createEntities(sparqlSelectTask, results, vars, executionReportUpdater) } - private def select(sparqlSelectTask: SparqlSelectCustomTask, sparql: SparqlEndpoint, selectLimit: Int) - (implicit userContext: UserContext): SparqlResults = { - executeSelect(sparql, sparqlSelectTask.queryTemplate.evaluate(sparql), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + private def select(sparqlSelectTask: SparqlSelectCustomTask, inputTask: Task[_ <: DatasetSpec[RdfDataset]], selectLimit: Int) + (implicit pluginContext: PluginContext): SparqlResults = { + implicit val user: UserContext = pluginContext.user + val sparqlEndpoint = inputTask.data.plugin.sparqlEndpoint + executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.evaluate(inputTask), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) } /** diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala index 7d1fd677d8..9f76cd6c37 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala @@ -1,6 +1,11 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.silkframework.dataset.rdf.SparqlEndpoint +import org.silkframework.config.Task +import org.silkframework.dataset.DatasetSpec +import org.silkframework.dataset.rdf.RdfDataset +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSelectTemplate.inputTaskScope +import org.silkframework.runtime.plugin.PluginContext +import org.silkframework.runtime.templating.TemplateVariableConversions._ import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateVariableValue} import java.io.StringWriter @@ -13,19 +18,21 @@ class SparqlSelectTemplate(template: CompiledTemplate) { /** * Evaluates the template and returns the SPARQL query. */ - def evaluate(endpoint: SparqlEndpoint): String = { - evaluateWithVariables(Seq( - new TemplateVariableValue(name = "graph", values = endpoint.sparqlParams.graph.toSeq) - )) + def evaluate(task: Task[DatasetSpec[RdfDataset]])(implicit pluginContext: PluginContext): String = { + evaluateWithVariables( + fromTask(task, scope = inputTaskScope) + ) } /** * Evaluates the template using default values for the variables and returns the SPARQL query. + * All unassigned variables are filled with an empty string. */ def evaluateWithDefaults(): String = { - evaluateWithVariables(Seq( - new TemplateVariableValue(name = "graph", values = Seq.empty) - )) + val defaultVariables = template.variables.getOrElse(Seq.empty).map { varName => + new TemplateVariableValue(varName.name, varName.scope, Seq("")) + } + evaluateWithVariables(defaultVariables) } private def evaluateWithVariables(variables: Seq[TemplateVariableValue]): String = { @@ -38,6 +45,8 @@ class SparqlSelectTemplate(template: CompiledTemplate) { object SparqlSelectTemplate { + val inputTaskScope: Seq[String] = Seq("input") + /** * Creates a SPARQL template from a string. */ diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index c3599935ff..d033fd70ca 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -3,13 +3,14 @@ package org.silkframework.plugins.dataset.rdf.executors import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers -import org.silkframework.config.PlainTask +import org.silkframework.config.{PlainTask, Task} import org.silkframework.dataset.rdf._ +import org.silkframework.dataset.{DataSource, DatasetSpec, EntitySink, LinkSink} import org.silkframework.entity.Entity import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask import org.silkframework.runtime.activity.{TestUserContextTrait, UserContext} import org.silkframework.runtime.iterator.{CloseableIterator, TraversableIterator} -import org.silkframework.runtime.plugin.PluginContext +import org.silkframework.runtime.plugin.{ParameterValues, PluginContext} import org.silkframework.util.{MockitoSugar, TestMocks} import scala.collection.immutable.SortedMap @@ -42,7 +43,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec } Entity.empty("") // Make sure that Entity class is loaded val start = System.currentTimeMillis() - val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, sparqlEndpoint, executionReportUpdater = Some(reportUpdater)) + val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint), executionReportUpdater = Some(reportUpdater)) val entity = entities.head entity.values.flatten.head mustBe "subject 0" (System.currentTimeMillis() - start).toInt must be < quickReactionTime @@ -58,24 +59,41 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec correctTimeout = endpoint.sparqlParams.timeout.contains(timeout) }) val limit = 1000 * 1000 * 1000 - val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, sparqlEndpoint, limit, Some(reportUpdater)) + val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint), limit, Some(reportUpdater)) entities.headOption // Needed to actually execute the query correctTimeout mustBe true } - it should "evaluate a Jinja query template using the graph variable from the SPARQL endpoint" in { + it should "evaluate a Jinja query template using the graph variable from the task parameters" in { val graphUri = "http://example.org/testGraph" - val query = "SELECT * WHERE { GRAPH <{{ graph ~ \"/data\" }}> { ?s ?p ?o } }" + val query = "SELECT * WHERE { GRAPH <{{ input.parameters.graph ~ \"/data\" }}> { ?s ?p ?o } }" val task = SparqlSelectCustomTask(query) var capturedQuery = "" val activityContextMock = TestMocks.activityContextMock() val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) - val sparqlEndpoint = sparqlEndpointStub(graphUri = Some(graphUri), queryCapture = q => capturedQuery = q) - LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, sparqlEndpoint, executionReportUpdater = Some(reportUpdater)).headOption + val sparqlEndpoint = sparqlEndpointStub(queryCapture = q => capturedQuery = q) + LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint, graphUri = Some(graphUri)), executionReportUpdater = Some(reportUpdater)).headOption task.outputSchema.typedPaths.map(_.toUntypedPath.normalizedSerialization) mustBe IndexedSeq("s", "p", "o") capturedQuery must include(s"<$graphUri/data>") - capturedQuery must not include "{{ graph" + capturedQuery must not include "{{ input.parameters.graph" + } + + private def taskWithEndpoint(sparqlEndpoint: SparqlEndpoint, graphUri: Option[String] = None): Task[DatasetSpec[RdfDataset]] = { + PlainTask("testDataset", DatasetSpec(new StubRdfDataset(sparqlEndpoint, graphUri))) + } + + private class StubRdfDataset(endpoint: SparqlEndpoint, graphUri: Option[String] = None) extends RdfDataset { + override def sparqlEndpoint: SparqlEndpoint = endpoint + override def parameters(implicit pluginContext: PluginContext): ParameterValues = { + graphUri match { + case Some(g) => ParameterValues.fromStringMap(Map("graph" -> g)) + case None => ParameterValues.empty + } + } + override def source(implicit userContext: UserContext): DataSource = ??? + override def linkSink(implicit userContext: UserContext): LinkSink = ??? + override def entitySink(implicit userContext: UserContext): EntitySink = ??? } private def sparqlEndpointStub(selectCallback: SparqlEndpoint => Unit = _ => {}, @@ -105,4 +123,4 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec } } } -} +} \ No newline at end of file diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala index 5aa3237145..f654c095b3 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala @@ -91,8 +91,10 @@ class JinjaTemplate(val node: Node) extends CompiledTemplate { for (vars <- variables) { // Collect all scoped variables of the form 'scope.name' val names = values.map(_.asName) - // Variables of the form 'scope.name' can also be addressed as just 'scope' - val scopes = values.filter(_.scope.nonEmpty).map(v => new TemplateVariableName(v.scope.mkString("."), Seq.empty)) + // Variables of the form 'scope.name' can also be addressed by any scope prefix (e.g., 'input' or 'input.parameters') + val scopes = values.filter(_.scope.nonEmpty).flatMap { v => + (1 to v.scope.length).map(n => new TemplateVariableName(v.scope.take(n).mkString("."), Seq.empty)) + } // Find missing vars val existingVars = (names ++ scopes).toSet missingVars = vars.filterNot(existingVars.contains) diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala index 6188d13a2f..6fafce09ef 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala @@ -105,9 +105,21 @@ class JinjaVariableCollector { unboundVars = Seq(new TemplateVariableName(varName, Seq.empty)) ) case _ => - Scope( - unboundVars = tree.getIdentifierNodes.asScala.map(_.getName).filterNot(ignoreIdentifierNode).toSeq.map(new TemplateVariableName(_, Seq.empty)) - ) + // Try to find scoped variable references (e.g. `scope.name`) within complex expressions + val scopedVars = JinjaVariableCollector.scopedName.findAllMatchIn(expression).map { m => + val scopePart = m.group(1).dropRight(1) + val name = m.group(2) + new TemplateVariableName(name, scopePart.split('.').toSeq) + }.toSeq + // Collect plain (unscoped) identifiers, excluding roots of scoped vars (e.g. `loop` from `loop.index`) + val scopedRoots = scopedVars.flatMap(_.scope.headOption).toSet + val plainVars = tree.getIdentifierNodes.asScala + .map(_.getName) + .filterNot(ignoreIdentifierNode) + .filterNot(scopedRoots) + .toSeq + .map(new TemplateVariableName(_, Seq.empty)) + Scope(unboundVars = (scopedVars ++ plainVars).distinct) } } catch { case _: TreeBuilderException => diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala index faf0762bbe..f2ce0c231a 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala @@ -92,6 +92,11 @@ class JinjaVariableCollectorTest extends AnyFlatSpec with Matchers { collect("This is {{project.name}} from {{global.city}}.") shouldBe Seq("project.name", "global.city") } + it should "collect scoped variables in complex expressions" in { + collect("{{ input.parameters.graph ~ \"/data\" }}") shouldBe Seq("input.parameters.graph") + collect("{{ a.b ~ c.d }}") shouldBe Seq("a.b", "c.d") + } + it should "collect variables used in method calls" in { collect( """ diff --git a/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala b/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala index c20b0ce9fa..911d8043bd 100644 --- a/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala +++ b/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala @@ -11,7 +11,7 @@ import io.swagger.v3.oas.annotations.parameters.RequestBody import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag import io.swagger.v3.oas.annotations.{Operation, Parameter} -import org.silkframework.config.{Prefixes, TaskSpec} +import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.dataset.DatasetSpec.GenericDatasetSpec import org.silkframework.dataset._ import org.silkframework.dataset.rdf.RdfDataset @@ -239,9 +239,9 @@ class PeakTransformApi @Inject() () extends InjectedController with UserContextA } else { val datasetTask = project.task[GenericDatasetSpec](sparqlDataset) datasetTask.data.plugin match { - case rdfDataset: RdfDataset with Dataset => + case _: RdfDataset with Dataset => val executor = LocalSparqlSelectExecutor() - val entities = executor.executeOnSparqlEndpoint(sparqlSelectTask, rdfDataset.sparqlEndpoint, maxTryEntities, executionReportUpdater = None) + val entities = executor.executeOnSparqlEndpoint(sparqlSelectTask, datasetTask.asInstanceOf[Task[_ <: DatasetSpec[RdfDataset]]], maxTryEntities, executionReportUpdater = None) val entityDatasource = EntityDatasource(datasetTask, entities, sparqlSelectTask.outputSchema) try { entityDatasource.peak(ruleSchemata.inputSchema, maxTryEntities).use { exampleEntities => From 58fa582397f216e0d0c361f453e5d30d4f945643 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 30 Mar 2026 17:37:09 +0200 Subject: [PATCH 33/63] Update doc of SparqlSelectCustomTask --- .../dataset/rdf/tasks/SparqlSelectCustomTask.md | 14 ++++++++------ .../dataset/rdf/tasks/SparqlSelectCustomTask.scala | 5 +++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index b816584be4..7f3c1b8c50 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -18,22 +18,24 @@ This tabular raw form is transformed into an _entity table_. ### Templating -The select query supports [Jinja](https://jinja.palletsprojects.com/) templating. The following variable is -automatically provided at execution time: +The select query supports [Jinja](https://jinja.palletsprojects.com/) templating. All parameters of the input task +are automatically provided at execution time under the `input.parameters` scope: | Variable | Description | |----------|-------------| -| `graph` | The named graph URI from the input dataset's graph parameter. Empty if no graph is configured. | +| `input.parameters.graph` | The named graph URI from the input dataset's graph parameter. Empty if no graph is configured. | +| `input.parameters.` | Any other parameter of the input dataset, referenced by its parameter name. | -The `graph` variable can be used directly or combined with Jinja expressions. For example, to query a named graph +These variables can be used directly or combined with Jinja expressions. For example, to query a named graph whose URI is derived by appending a suffix to the configured graph URI: ```sparql -SELECT * WHERE { GRAPH <{{ graph ~ "/data" }}> { ?s ?p ?o } } +SELECT * WHERE { GRAPH <{{ input.parameters.graph ~ "/data" }}> { ?s ?p ?o } } ``` The output schema (i.e. the result variables) is derived from the query at configuration time by evaluating the -template with default values (empty `graph`), so the query must remain valid SPARQL regardless of the graph value. +template with default values (empty strings for all parameters), so the query must remain valid SPARQL regardless +of the parameter values. ### Internal Specifics diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index dde4915ec8..1ee0344847 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -34,8 +34,9 @@ case class SparqlSelectCustomTask( @Param( label = "Select query", value = "A SPARQL 1.1 select query. The query supports Jinja templating. " + - "The 'graph' variable is automatically provided from the input dataset's graph parameter. " + - "Example with graph: SELECT * WHERE { GRAPH <{{ graph ~ \"/data\" }}> { ?s ?p ?o } }", + "All parameters of the input dataset are automatically provided under the 'input.parameters' scope, " + + "e.g., 'input.parameters.graph' for the graph parameter. " + + "Example with graph: SELECT * WHERE { GRAPH <{{ input.parameters.graph ~ \"/data\" }}> { ?s ?p ?o } }", example = "select * where { ?s ?p ?o }") selectQuery: SparqlCodeParameter, @Param(label = "Result limit", value = "If set to a positive integer, the number of results is limited") From bc81151e3b5dd9a10c66a12015443bfa5c04b7f1 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Tue, 31 Mar 2026 09:07:03 +0200 Subject: [PATCH 34/63] JinjaVariableCollector: when merging scopes with ++, a variable like TemplateVariableName("label", Seq("nested", "sub")) is now considered bound if its scope root ("nested") is any simple bound variable (scope.isEmpty) --- .../plugins/templating/jinja/JinjaVariableCollector.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala index 6fafce09ef..2aea83093e 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala @@ -161,8 +161,12 @@ class JinjaVariableCollector { */ def ++(scope: Scope): Scope = { val boundVarsSet = boundVars.toSet + val boundSimpleNames = boundVars.filter(_.scope.isEmpty).map(_.name).toSet + def isBound(v: TemplateVariableName): Boolean = { + boundVarsSet.contains(v) || v.scope.headOption.exists(boundSimpleNames.contains) + } Scope( - unboundVars = (unboundVars ++ scope.unboundVars).distinct.filterNot(boundVarsSet), + unboundVars = (unboundVars ++ scope.unboundVars).distinct.filterNot(isBound), boundVars = (boundVars ++ scope.boundVars).distinct ) } From f13ac8caecf72ee9ea9729ce9b834efcbfc664e8 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 8 Apr 2026 10:15:05 +0200 Subject: [PATCH 35/63] Improve SparqlSelectCustomTask to support the same format for input and output properties as the update task. --- .../TemplateVariableConversions.scala | 2 +- .../rdf/tasks/SparqlSelectCustomTask.md | 20 +++-- .../executors/LocalSparqlSelectExecutor.scala | 12 ++- .../executors/LocalSparqlUpdateExecutor.scala | 14 +-- .../rdf/tasks/SparqlSelectCustomTask.scala | 11 ++- .../rdf/tasks/SparqlUpdateCustomTask.scala | 3 +- .../templating/SparqlSelectTemplate.scala | 58 ------------- ...ateTemplate.scala => SparqlTemplate.scala} | 86 ++++++++++++------- ...arqlUpdateTemplatingEngineSimpleTest.scala | 2 +- .../LocalSparqlSelectExecutorTest.scala | 6 +- ...st.scala => SparqlTemplateJinjaTest.scala} | 6 +- ...scala => SparqlTemplateVelocityTest.scala} | 8 +- 12 files changed, 97 insertions(+), 131 deletions(-) delete mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala rename silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlUpdateTemplate.scala => SparqlTemplate.scala} (71%) rename silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlUpdateTemplateJinjaTest.scala => SparqlTemplateJinjaTest.scala} (94%) rename silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlUpdateTemplateVelocityTest.scala => SparqlTemplateVelocityTest.scala} (92%) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala index a74eddf02a..6ac9eb9d8e 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala @@ -1,6 +1,6 @@ package org.silkframework.runtime.templating -import org.silkframework.config.{Task, TaskSpec} +import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.entity.Entity import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, SimpleParameterValue} diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index 7f3c1b8c50..0fb826dc66 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -18,19 +18,21 @@ This tabular raw form is transformed into an _entity table_. ### Templating -The select query supports [Jinja](https://jinja.palletsprojects.com/) templating. All parameters of the input task -are automatically provided at execution time under the `input.parameters` scope: +The select query supports [Jinja](https://jinja.palletsprojects.com/) templating. Properties of the input and output +tasks can be accessed via the `inputProperties` and `outputProperties` objects. Both objects expose the same methods +as the `row` object in the SPARQL Update operator: -| Variable | Description | -|----------|-------------| -| `input.parameters.graph` | The named graph URI from the input dataset's graph parameter. Empty if no graph is configured. | -| `input.parameters.` | Any other parameter of the input dataset, referenced by its parameter name. | +- `inputProperties.uri(param)`: Renders a parameter of the input task as **URI**. +- `inputProperties.plainLiteral(param)`: Renders a parameter of the input task as **plain literal**. +- `inputProperties.rawUnsafe(param)`: Renders a parameter of the input task as is, i.e. **no escaping** is done. +- `inputProperties.exists(param)`: Returns `true` if the parameter **exists** in the input task, else `false`. -These variables can be used directly or combined with Jinja expressions. For example, to query a named graph -whose URI is derived by appending a suffix to the configured graph URI: +The same methods are available on `outputProperties` for the output task. + +For example, to query the named graph that is configured on the input dataset: ```sparql -SELECT * WHERE { GRAPH <{{ input.parameters.graph ~ "/data" }}> { ?s ?p ?o } } +SELECT * WHERE { GRAPH {{ inputProperties.uri("graph") }} { ?s ?p ?o } } ``` The output schema (i.e. the result variables) is derived from the query at configuration time by evaluating the diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index 46aac4f8cc..3b40774345 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -8,6 +8,7 @@ import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, Loc import org.silkframework.execution.typed.SparqlEndpointEntitySchema import org.silkframework.execution.{ExecutionReport, ExecutionReportUpdater, ExecutorOutput, TaskException} import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask +import org.silkframework.plugins.dataset.rdf.tasks.templating.TaskProperties import org.silkframework.runtime.activity.{ActivityContext, UserContext} import org.silkframework.runtime.iterator.CloseableIterator import org.silkframework.runtime.plugin.PluginContext @@ -36,20 +37,25 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT def executeOnSparqlEndpoint(sparqlSelectTask: SparqlSelectCustomTask, inputTask: Task[DatasetSpec[RdfDataset]], + outputTask: Task[_ <: TaskSpec], limit: Int = Integer.MAX_VALUE, executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { val selectLimit = math.min(sparqlSelectTask.intLimit.getOrElse(Integer.MAX_VALUE), limit) - val results = select(sparqlSelectTask, inputTask, selectLimit) + val results = select(sparqlSelectTask, inputTask, outputTask, selectLimit) val vars: IndexedSeq[String] = getSparqlVars(sparqlSelectTask) createEntities(sparqlSelectTask, results, vars, executionReportUpdater) } - private def select(sparqlSelectTask: SparqlSelectCustomTask, inputTask: Task[_ <: DatasetSpec[RdfDataset]], selectLimit: Int) + private def select(sparqlSelectTask: SparqlSelectCustomTask, + inputTask: Task[_ <: DatasetSpec[RdfDataset]], + outputTask: Task[_ <: TaskSpec], + selectLimit: Int) (implicit pluginContext: PluginContext): SparqlResults = { implicit val user: UserContext = pluginContext.user val sparqlEndpoint = inputTask.data.plugin.sparqlEndpoint - executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.evaluate(inputTask), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + val taskProperties = TaskProperties.create(Some(inputTask), Some(outputTask), pluginContext) + executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.generate(Map.empty, taskProperties), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) } /** diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala index 0f3044a49a..024a4dd6ae 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala @@ -31,7 +31,7 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT // Generate SPARQL Update queries for input entities def executeOnInput[U](batchEmitter: BatchSparqlUpdateEmitter[U], expectedProperties: IndexedSeq[String], input: LocalEntities): Unit = { val inputProperties = getInputProperties(input.entitySchema).distinct - val taskProperties = createTaskProperties(Some(input.task), output.task, pluginContext) + val taskProperties = TaskProperties.create(Some(input.task), output.task, pluginContext) checkInputSchema(expectedProperties, inputProperties.toSet) for (entity <- input.entities; values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) if values.forall(_.nonEmpty)) { @@ -74,21 +74,11 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT inputTask: Option[Task[_ <: TaskSpec]] = None, outputTask: Option[Task[_ <: TaskSpec]] = None) (implicit pluginContext: PluginContext): Unit = { - val taskProperties = createTaskProperties(inputTask = inputTask, outputTask = outputTask, pluginContext = pluginContext) + val taskProperties = TaskProperties.create(inputTask = inputTask, outputTask = outputTask, pluginContext = pluginContext) val query = updateTask.compiledTemplate.generate(Map.empty, taskProperties) batchEmitter.update(query) } - private def createTaskProperties(inputTask: Option[Task[_ <: TaskSpec]], - outputTask: Option[Task[_ <: TaskSpec]], - pluginContext: PluginContext): TaskProperties = { - // It's obligatory to have empty prefixes here, since we do not want to have prefixed URIs for URI parameters - implicit val updatedPluginContext: PluginContext = PluginContext.updatedPluginContext(pluginContext, prefixes = Some(Prefixes.empty)) - val inputProperties = inputTask.toSeq.flatMap(_.parameters.toStringMap).toMap - val outputProperties = outputTask.toSeq.flatMap(_.parameters.toStringMap).toMap - TaskProperties(inputProperties, outputProperties) - } - // Check that expected schema is subset of input schema private def checkInputSchema(expectedProperties: Seq[String], inputProperties: Set[String]): Unit = { if (expectedProperties.exists(p => !inputProperties.contains(p))) { diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 1ee0344847..2cd2113402 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -6,7 +6,7 @@ import org.silkframework.dataset.rdf.SparqlEndpointDatasetParameter import org.silkframework.entity._ import org.silkframework.entity.paths.{TypedPath, UntypedPath} import org.silkframework.execution.typed.SparqlEndpointEntitySchema -import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSelectTemplate +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate import org.silkframework.runtime.plugin.annotations.{Param, Plugin} import org.silkframework.runtime.plugin.types.SparqlCodeParameter import org.silkframework.runtime.templating.TemplateEngineAutocompletionProvider @@ -34,9 +34,8 @@ case class SparqlSelectCustomTask( @Param( label = "Select query", value = "A SPARQL 1.1 select query. The query supports Jinja templating. " + - "All parameters of the input dataset are automatically provided under the 'input.parameters' scope, " + - "e.g., 'input.parameters.graph' for the graph parameter. " + - "Example with graph: SELECT * WHERE { GRAPH <{{ input.parameters.graph ~ \"/data\" }}> { ?s ?p ?o } }", + "Properties of the input and output tasks can be accessed via the 'inputProperties' and 'outputProperties' objects. " + + "Example: SELECT * WHERE { GRAPH {{ inputProperties.uri(\"graph\") }} { ?s ?p ?o } }", example = "select * where { ?s ?p ?o }") selectQuery: SparqlCodeParameter, @Param(label = "Result limit", value = "If set to a positive integer, the number of results is limited") @@ -65,7 +64,7 @@ case class SparqlSelectCustomTask( Try(limit.toInt).filter(_ > 0).toOption } - val queryTemplate: SparqlSelectTemplate = SparqlSelectTemplate.create(templatingMode, selectQuery.str) + val queryTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, selectQuery.str) override def inputPorts: InputPorts = { FixedNumberOfInputs(Seq(FixedSchemaPort(SparqlEndpointEntitySchema.schema))) @@ -76,7 +75,7 @@ case class SparqlSelectCustomTask( } val outputSchema: EntitySchema = { - val query = QueryFactory.create(queryTemplate.evaluateWithDefaults()) + val query = QueryFactory.create(queryTemplate.generateWithDefaults()) if (!query.isSelectType) { throw new ValidationException("Query is not a SELECT query!") } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index f785520fa1..248c6c747c 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -37,7 +37,8 @@ case class SparqlUpdateCustomTask( ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") - val compiledTemplate: SparqlUpdateTemplate = SparqlUpdateTemplate.create(templatingMode, sparqlUpdateTemplate.str, batchSize) + val compiledTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, sparqlUpdateTemplate.str) + compiledTemplate.validateUpdateQuery(batchSize) def isStaticTemplate: Boolean = compiledTemplate.isStaticTemplate diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala deleted file mode 100644 index 9f76cd6c37..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectTemplate.scala +++ /dev/null @@ -1,58 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating - -import org.silkframework.config.Task -import org.silkframework.dataset.DatasetSpec -import org.silkframework.dataset.rdf.RdfDataset -import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSelectTemplate.inputTaskScope -import org.silkframework.runtime.plugin.PluginContext -import org.silkframework.runtime.templating.TemplateVariableConversions._ -import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateVariableValue} - -import java.io.StringWriter - -/** - * Wraps a [[CompiledTemplate]] and adds SPARQL SELECT specific capabilities. - */ -class SparqlSelectTemplate(template: CompiledTemplate) { - - /** - * Evaluates the template and returns the SPARQL query. - */ - def evaluate(task: Task[DatasetSpec[RdfDataset]])(implicit pluginContext: PluginContext): String = { - evaluateWithVariables( - fromTask(task, scope = inputTaskScope) - ) - } - - /** - * Evaluates the template using default values for the variables and returns the SPARQL query. - * All unassigned variables are filled with an empty string. - */ - def evaluateWithDefaults(): String = { - val defaultVariables = template.variables.getOrElse(Seq.empty).map { varName => - new TemplateVariableValue(varName.name, varName.scope, Seq("")) - } - evaluateWithVariables(defaultVariables) - } - - private def evaluateWithVariables(variables: Seq[TemplateVariableValue]): String = { - val writer = new StringWriter - template.evaluate(variables, writer) - writer.toString - } - -} - -object SparqlSelectTemplate { - - val inputTaskScope: Seq[String] = Seq("input") - - /** - * Creates a SPARQL template from a string. - */ - def create(templateEngineId: String, template: String): SparqlSelectTemplate = { - val templateEngine = TemplateEngines.create(templateEngineId) - val sparqlTemplate = new SparqlSelectTemplate(templateEngine.compile(template)) - sparqlTemplate - } -} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala similarity index 71% rename from silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala rename to silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index 999eaa4f8f..0a466d4143 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -1,10 +1,12 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.apache.jena.update.UpdateFactory +import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.entity.EntitySchema import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlUpdateTemplate.{InputProperties, OutputProperties, Row} +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate.{INPUT_PROPERTIES_VAR_NAME, InputProperties, OUTPUT_PROPERTIES_VAR_NAME, OutputProperties, ROW_VAR_NAME, Row} +import org.silkframework.runtime.plugin.PluginContext import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateMethodUsage, TemplateVariableName} import org.silkframework.runtime.validation.ValidationException @@ -14,7 +16,7 @@ import scala.util.{Failure, Success, Try} /** * Wraps a [[CompiledTemplate]] and adds SPARQL Update specific capabilities. */ -class SparqlUpdateTemplate(template: CompiledTemplate) { +class SparqlTemplate(template: CompiledTemplate) { /** * Renders the template based on the variable assignments. @@ -26,33 +28,40 @@ class SparqlUpdateTemplate(template: CompiledTemplate) { val values = scala.collection.mutable.LinkedHashMap[String, AnyRef]() // Flat entity values (used by simple template engine) placeholderAssignments.foreach { case (k, v) => values(k) = v } - // SPARQL context objects (used by Velocity engine) - values(SparqlUpdateTemplate.ROW_VAR_NAME) = Row(placeholderAssignments) - values(SparqlUpdateTemplate.INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) - values(SparqlUpdateTemplate.OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) + // SPARQL context objects + values(ROW_VAR_NAME) = Row(placeholderAssignments) + values(INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) + values(OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) val writer = new StringWriter() template.evaluate(values.toMap, writer) writer.toString } + /** + * Renders the template based assigning default values for all variables. + */ + def generateWithDefaults(): String = { + val genericUri = "urn:generic:1" + val entityVariables = entityVariableNames + val assignments = entityVariables.map(_ -> genericUri).toMap + val inputPropVars = taskPropertyVariableNames(Seq("inputProperties")).map(_ -> genericUri).toMap + val outputPropVars = taskPropertyVariableNames(Seq("outputProperties")).map(_ -> genericUri).toMap + val taskProps = TaskProperties(inputPropVars, outputPropVars) + Try(generate(assignments, taskProps)) match { + case Failure(exception) => + throw new ValidationException( + "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) + case Success(value) => value + } + } + /** Validates the template, including batch validation if batchSize > 1. */ - def validate(batchSize: Int): Unit = { + def validateUpdateQuery(batchSize: Int): Unit = { if (usesRawUnsafe) { // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. } else { // Generate example input assignments - val genericUri = "urn:generic:1" - val entityVariables = entityVariableNames - val assignments = entityVariables.map(_ -> genericUri).toMap - val inputPropVars = taskPropertyVariableNames(Seq("inputProperties")).map(_ -> genericUri).toMap - val outputPropVars = taskPropertyVariableNames(Seq("outputProperties")).map(_ -> genericUri).toMap - val taskProps = TaskProperties(inputPropVars, outputPropVars) - val sparqlQuery = Try(generate(assignments, taskProps)) match { - case Failure(exception) => - throw new ValidationException( - "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) - case Success(value) => value - } + val sparqlQuery = generateWithDefaults() Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => throw new ValidationException( "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + @@ -92,13 +101,13 @@ class SparqlUpdateTemplate(template: CompiledTemplate) { /** Returns SPARQL-specific variables, extracting paths from method usages. */ private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = { - val usages = SparqlUpdateTemplate.templatingVariables.flatMap(v => template.methodUsages(v)) + val usages = SparqlTemplate.templatingVariables.flatMap(v => template.methodUsages(v)) if (usages.nonEmpty) { - val rowVars = sparqlMethodUsages(SparqlUpdateTemplate.ROW_VAR_NAME) + val rowVars = sparqlMethodUsages(SparqlTemplate.ROW_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue)) - val inputPropVars = sparqlMethodUsages(SparqlUpdateTemplate.INPUT_PROPERTIES_VAR_NAME) + val inputPropVars = sparqlMethodUsages(INPUT_PROPERTIES_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, Seq("inputProperties"))) - val outputPropVars = sparqlMethodUsages(SparqlUpdateTemplate.OUTPUT_PROPERTIES_VAR_NAME) + val outputPropVars = sparqlMethodUsages(OUTPUT_PROPERTIES_VAR_NAME) .map(u => new TemplateVariableName(u.parameterValue, Seq("outputProperties"))) Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) } else { @@ -113,7 +122,7 @@ class SparqlUpdateTemplate(template: CompiledTemplate) { /** Checks if any SPARQL templating variable uses the rawUnsafe method. */ private lazy val usesRawUnsafe: Boolean = { - SparqlUpdateTemplate.templatingVariables.exists(varName => + SparqlTemplate.templatingVariables.exists(varName => sparqlMethodUsages(varName).exists(_.methodName == "rawUnsafe")) } @@ -138,7 +147,7 @@ class SparqlUpdateTemplate(template: CompiledTemplate) { } } -object SparqlUpdateTemplate { +object SparqlTemplate { private final val ROW_VAR_NAME = "row" private final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" @@ -149,11 +158,9 @@ object SparqlUpdateTemplate { /** * Creates a SPARQL template from a string. */ - def create(templateEngineId: String, template: String, batchSize: Int): SparqlUpdateTemplate = { + def create(templateEngineId: String, template: String): SparqlTemplate = { val templateEngine = TemplateEngines.create(templateEngineId) - val sparqlTemplate = new SparqlUpdateTemplate(templateEngine.compile(template)) - sparqlTemplate.validate(batchSize) - sparqlTemplate + new SparqlTemplate(templateEngine.compile(template)) } /** Row API used in SPARQL templates. Represents a single row where input paths are either exactly one value or empty. @@ -186,8 +193,27 @@ object SparqlUpdateTemplate { case class OutputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { override val templateVarName: String = OUTPUT_PROPERTIES_VAR_NAME } - } /** Makes properties of the input and output task of a SPARQL Update operator execution available. */ case class TaskProperties(inputTask: Map[String, String], outputTask: Map[String, String]) + +object TaskProperties { + + def create(inputTask: Option[Task[_ <: TaskSpec]], + outputTask: Option[Task[_ <: TaskSpec]], + pluginContext: PluginContext): TaskProperties = { + // It's obligatory to have empty prefixes here, since we do not want to have prefixed URIs for URI parameters + implicit val updatedPluginContext: PluginContext = PluginContext.updatedPluginContext(pluginContext, prefixes = Some(Prefixes.empty)) + val inputProperties = createTaskProperties(inputTask) + val outputProperties = createTaskProperties(outputTask) + TaskProperties(inputProperties, outputProperties) + } + + private def createTaskProperties(task: Option[Task[_ <: TaskSpec]]) + (implicit pluginContext: PluginContext): Map[String, String] = { + task.toSeq.flatMap(_.parameters.toStringMap).toMap + } + + +} \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index 8f289b9f5f..07edd5cc97 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -83,7 +83,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { def parse(sparqlUpdateTemplate: String, batchSize: Int = 2): Seq[SparqlUpdateTemplatePart] = { val compiled = SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate) - new SparqlUpdateTemplate(compiled).validate(batchSize) + new SparqlTemplate(compiled).validateUpdateQuery(batchSize) compiled.sparqlUpdateTemplateParts } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index d033fd70ca..67e8919872 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -66,7 +66,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec it should "evaluate a Jinja query template using the graph variable from the task parameters" in { val graphUri = "http://example.org/testGraph" - val query = "SELECT * WHERE { GRAPH <{{ input.parameters.graph ~ \"/data\" }}> { ?s ?p ?o } }" + val query = """SELECT * WHERE { GRAPH {{ inputProperties.uri("graph") }} { ?s ?p ?o } }""" val task = SparqlSelectCustomTask(query) var capturedQuery = "" val activityContextMock = TestMocks.activityContextMock() @@ -75,8 +75,8 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint, graphUri = Some(graphUri)), executionReportUpdater = Some(reportUpdater)).headOption task.outputSchema.typedPaths.map(_.toUntypedPath.normalizedSerialization) mustBe IndexedSeq("s", "p", "o") - capturedQuery must include(s"<$graphUri/data>") - capturedQuery must not include "{{ input.parameters.graph" + capturedQuery must include(s"<$graphUri>") + capturedQuery must not include "inputProperties.uri" } private def taskWithEndpoint(sparqlEndpoint: SparqlEndpoint, graphUri: Option[String] = None): Task[DatasetSpec[RdfDataset]] = { diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala similarity index 94% rename from silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateJinjaTest.scala rename to silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala index 3ffaf533c3..b10954b6e0 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala @@ -7,7 +7,7 @@ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException -class SparqlUpdateTemplateJinjaTest extends AnyFlatSpec with Matchers { +class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { behavior of "SPARQL templating with the Jinja Template Engine" @@ -115,10 +115,10 @@ class SparqlUpdateTemplateJinjaTest extends AnyFlatSpec with Matchers { } def generate(template: String, assignments: Map[String, String], taskProps: TaskProperties = TaskProperties(Map.empty, Map.empty)): String = { - new SparqlUpdateTemplate(JinjaTemplateEngine().compile(template)).generate(assignments, taskProps) + new SparqlTemplate(JinjaTemplateEngine().compile(template)).generate(assignments, taskProps) } def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlUpdateTemplate(JinjaTemplateEngine().compile(template)).validate(batchSize) + new SparqlTemplate(JinjaTemplateEngine().compile(template)).validateUpdateQuery(batchSize) } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala similarity index 92% rename from silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateVelocityTest.scala rename to silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala index 284298ea86..4744c4cf2f 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplateVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala @@ -7,7 +7,7 @@ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException -class SparqlUpdateTemplateVelocityTest extends AnyFlatSpec with Matchers { +class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { behavior of "SPARQL templating with the Velocity Template Engine" @@ -79,7 +79,7 @@ class SparqlUpdateTemplateVelocityTest extends AnyFlatSpec with Matchers { """SELECT * WHERE { | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") |}""".stripMargin - val template = new SparqlUpdateTemplate(VelocityTemplateEngine().compile(stringTemplate)) + val template = new SparqlTemplate(VelocityTemplateEngine().compile(stringTemplate)) for(i <- 1 to 10) { val rendered = template.generate(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i"), TaskProperties(Map.empty, Map.empty)) rendered mustBe @@ -113,10 +113,10 @@ class SparqlUpdateTemplateVelocityTest extends AnyFlatSpec with Matchers { } private def generate(templateString: String, bindings: Map[String, String]): String = { - new SparqlUpdateTemplate(VelocityTemplateEngine().compile(templateString)).generate(bindings, TaskProperties(Map.empty, Map.empty)) + new SparqlTemplate(VelocityTemplateEngine().compile(templateString)).generate(bindings, TaskProperties(Map.empty, Map.empty)) } def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlUpdateTemplate(VelocityTemplateEngine().compile(template)).validate(batchSize) + new SparqlTemplate(VelocityTemplateEngine().compile(template)).validateUpdateQuery(batchSize) } } From 90cd9118d582353e33e66d057bfb92d19ac67b02 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 8 Apr 2026 10:29:01 +0200 Subject: [PATCH 36/63] Improve SparqlSelectCustomTask to support the same format for input and output properties as the update task. --- .../dataset/rdf/executors/LocalSparqlSelectExecutor.scala | 8 ++++---- .../rdf/executors/LocalSparqlSelectExecutorTest.scala | 6 +++--- .../app/controllers/transform/PeakTransformApi.scala | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index 3b40774345..b565185160 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -28,7 +28,7 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT inputs match { case Seq(SparqlEndpointEntitySchema(sparql)) => implicit val executionReportUpdater: SparqlSelectExecutionReportUpdater = SparqlSelectExecutionReportUpdater(task, context) - val entities = executeOnSparqlEndpoint(taskData, sparql.task, executionReportUpdater = Some(executionReportUpdater)) + val entities = executeOnSparqlEndpoint(taskData, sparql.task, output.task, executionReportUpdater = Some(executionReportUpdater)) Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) case _ => throw TaskException("SPARQL select executor did not receive a SPARQL endpoint as requested!") @@ -37,7 +37,7 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT def executeOnSparqlEndpoint(sparqlSelectTask: SparqlSelectCustomTask, inputTask: Task[DatasetSpec[RdfDataset]], - outputTask: Task[_ <: TaskSpec], + outputTask: Option[Task[_ <: TaskSpec]], limit: Int = Integer.MAX_VALUE, executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { @@ -49,12 +49,12 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT private def select(sparqlSelectTask: SparqlSelectCustomTask, inputTask: Task[_ <: DatasetSpec[RdfDataset]], - outputTask: Task[_ <: TaskSpec], + outputTask: Option[Task[_ <: TaskSpec]], selectLimit: Int) (implicit pluginContext: PluginContext): SparqlResults = { implicit val user: UserContext = pluginContext.user val sparqlEndpoint = inputTask.data.plugin.sparqlEndpoint - val taskProperties = TaskProperties.create(Some(inputTask), Some(outputTask), pluginContext) + val taskProperties = TaskProperties.create(Some(inputTask), outputTask, pluginContext) executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.generate(Map.empty, taskProperties), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index 67e8919872..733fffd762 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -43,7 +43,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec } Entity.empty("") // Make sure that Entity class is loaded val start = System.currentTimeMillis() - val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint), executionReportUpdater = Some(reportUpdater)) + val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint), None, executionReportUpdater = Some(reportUpdater)) val entity = entities.head entity.values.flatten.head mustBe "subject 0" (System.currentTimeMillis() - start).toInt must be < quickReactionTime @@ -59,7 +59,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec correctTimeout = endpoint.sparqlParams.timeout.contains(timeout) }) val limit = 1000 * 1000 * 1000 - val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint), limit, Some(reportUpdater)) + val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint), None, limit, Some(reportUpdater)) entities.headOption // Needed to actually execute the query correctTimeout mustBe true } @@ -72,7 +72,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec val activityContextMock = TestMocks.activityContextMock() val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) val sparqlEndpoint = sparqlEndpointStub(queryCapture = q => capturedQuery = q) - LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint, graphUri = Some(graphUri)), executionReportUpdater = Some(reportUpdater)).headOption + LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint, graphUri = Some(graphUri)), None, executionReportUpdater = Some(reportUpdater)).headOption task.outputSchema.typedPaths.map(_.toUntypedPath.normalizedSerialization) mustBe IndexedSeq("s", "p", "o") capturedQuery must include(s"<$graphUri>") diff --git a/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala b/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala index 911d8043bd..cc1d8cabd4 100644 --- a/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala +++ b/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala @@ -241,7 +241,7 @@ class PeakTransformApi @Inject() () extends InjectedController with UserContextA datasetTask.data.plugin match { case _: RdfDataset with Dataset => val executor = LocalSparqlSelectExecutor() - val entities = executor.executeOnSparqlEndpoint(sparqlSelectTask, datasetTask.asInstanceOf[Task[_ <: DatasetSpec[RdfDataset]]], maxTryEntities, executionReportUpdater = None) + val entities = executor.executeOnSparqlEndpoint(sparqlSelectTask, datasetTask.asInstanceOf[Task[_ <: DatasetSpec[RdfDataset]]], None, maxTryEntities, executionReportUpdater = None) val entityDatasource = EntityDatasource(datasetTask, entities, sparqlSelectTask.outputSchema) try { entityDatasource.peak(ruleSchemata.inputSchema, maxTryEntities).use { exampleEntities => From 25da5f06a3ed549b67459086c620cfcb4d049c3c Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 22 Apr 2026 15:24:54 +0200 Subject: [PATCH 37/63] Update SPARQL template variables for Jinja --- .../rdf/tasks/SparqlSelectCustomTask.md | 25 +- .../rdf/tasks/SparqlUpdateCustomTask.md | 24 +- .../executors/LocalSparqlSelectExecutor.scala | 3 +- .../executors/LocalSparqlUpdateExecutor.scala | 7 +- .../rdf/tasks/SparqlSelectCustomTask.scala | 5 +- .../rdf/tasks/SparqlUpdateCustomTask.scala | 2 +- .../templating/SparqlJinjaTemplate.scala | 95 ++++++++ .../templating/SparqlLegacyTemplate.scala | 180 +++++++++++++++ .../rdf/tasks/templating/SparqlTemplate.scala | 216 ++++-------------- .../dataset/rdf/sparqlUpdateProject.zip | Bin 11241 -> 11221 bytes ...arqlUpdateTemplatingEngineSimpleTest.scala | 2 +- .../LocalSparqlSelectExecutorTest.scala | 4 +- .../templating/SparqlTemplateJinjaTest.scala | 142 +++++------- .../SparqlTemplateVelocityTest.scala | 6 +- .../workspaceApi/ProjectTaskApiTest.scala | 2 +- 15 files changed, 411 insertions(+), 302 deletions(-) create mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala create mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index 0fb826dc66..093ccab317 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -18,26 +18,27 @@ This tabular raw form is transformed into an _entity table_. ### Templating -The select query supports [Jinja](https://jinja.palletsprojects.com/) templating. Properties of the input and output -tasks can be accessed via the `inputProperties` and `outputProperties` objects. Both objects expose the same methods -as the `row` object in the SPARQL Update operator: +The select query supports [Jinja](https://jinja.palletsprojects.com/) templating. The following variables are available: -- `inputProperties.uri(param)`: Renders a parameter of the input task as **URI**. -- `inputProperties.plainLiteral(param)`: Renders a parameter of the input task as **plain literal**. -- `inputProperties.rawUnsafe(param)`: Renders a parameter of the input task as is, i.e. **no escaping** is done. -- `inputProperties.exists(param)`: Returns `true` if the parameter **exists** in the input task, else `false`. - -The same methods are available on `outputProperties` for the output task. +- `input.config.`: a parameter of the connected input task. +- `output.config.`: a parameter of the connected output task. +- `project.`: a project-scoped template variable. +- `global.`: a global template variable. For example, to query the named graph that is configured on the input dataset: ```sparql -SELECT * WHERE { GRAPH {{ inputProperties.uri("graph") }} { ?s ?p ?o } } +SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } } ``` +Parameter and variable names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript access +is not supported. + +Rendering helpers such as `| uri` or `| plainLiteral` filters are not yet implemented; values are inserted +verbatim and any quoting / URI brackets must be written explicitly in the template. + The output schema (i.e. the result variables) is derived from the query at configuration time by evaluating the -template with default values (empty strings for all parameters), so the query must remain valid SPARQL regardless -of the parameter values. +template with default values, so the query must remain valid SPARQL regardless of the parameter values. ### Internal Specifics diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md index b51f12a43b..079c86e8ee 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md @@ -22,23 +22,25 @@ The `Simple` and `Velocity Engine` modes are deprecated. `{% %}` for control flow statements such as conditionals. ``` -DELETE DATA { {{ row.uri("PROP_FROM_ENTITY_SCHEMA1") }} rdf:label {{ row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") }} } ; -{% if row.exists("PROP_FROM_ENTITY_SCHEMA1") %} - INSERT DATA { {{ row.uri("PROP_FROM_ENTITY_SCHEMA1") }} rdf:label {{ row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3") }} } ; +DELETE DATA { <{{ input.entity.subject }}> rdfs:label "{{ input.entity.oldLabel }}" } ; +{% if input.entity.subject %} + INSERT DATA { <{{ input.entity.subject }}> rdfs:label "{{ input.entity.newLabel }}" } ; {% endif %} ``` -Input values are accessible via methods on the `row` variable: +The following variables are available: -- `row.uri(inputPath)`: Renders an input value as **URI**. Throws an exception if the value isn't a valid URI. -- `row.plainLiteral(inputPath)`: Renders an input value as **plain literal**, i.e. it escapes problematic characters. -- `row.rawUnsafe(inputPath)`: Renders an input value as is, i.e. **no escaping** is done. This should **only** be used if the input values can be trusted. -- `row.exists(inputPath)`: Returns `true` if a value for the input path **exists**, else `false`. +- `input.entity.`: the value of the given property on the current input entity. +- `input.config.`: a parameter of the connected input task. +- `output.config.`: a parameter of the connected output task. +- `project.`: a project-scoped template variable. +- `global.`: a global template variable. -The methods `uri`, `plainLiteral` and `rawUnsafe` throw an exception if no input value is available for the given input path. +Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript access such as +`input.entity["urn:prop:label"]` is not supported. -In addition to input values, properties of the input and output tasks can be accessed via the `inputProperties` and -`outputProperties` objects in the same way as the `row` object. For example with `{{ inputProperties.uri("graph") }}`. +Rendering helpers such as `| uri` or `| plainLiteral` filters are not yet implemented; values are inserted +verbatim and any quoting / URI brackets must be written explicitly in the template. ### Example of the `Simple` mode (deprecated) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index b565185160..69ebe16902 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -55,7 +55,8 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT implicit val user: UserContext = pluginContext.user val sparqlEndpoint = inputTask.data.plugin.sparqlEndpoint val taskProperties = TaskProperties.create(Some(inputTask), outputTask, pluginContext) - executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.generate(Map.empty, taskProperties), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + val templateVariables = pluginContext.templateVariables.all.variables + executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.generate(Map.empty, taskProperties, templateVariables), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) } /** diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala index 024a4dd6ae..479c008621 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala @@ -28,6 +28,8 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT val updateTask = task.data val expectedSchema = updateTask.expectedInputSchema + val templateVariables = pluginContext.templateVariables.all.variables + // Generate SPARQL Update queries for input entities def executeOnInput[U](batchEmitter: BatchSparqlUpdateEmitter[U], expectedProperties: IndexedSeq[String], input: LocalEntities): Unit = { val inputProperties = getInputProperties(input.entitySchema).distinct @@ -37,7 +39,7 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) if values.forall(_.nonEmpty)) { val it = CrossProductIterator(values, expectedProperties) while (it.hasNext) { - val query = updateTask.compiledTemplate.generate(it.next(), taskProperties) + val query = updateTask.compiledTemplate.generate(it.next(), taskProperties, templateVariables) batchEmitter.update(query) } } @@ -75,7 +77,8 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT outputTask: Option[Task[_ <: TaskSpec]] = None) (implicit pluginContext: PluginContext): Unit = { val taskProperties = TaskProperties.create(inputTask = inputTask, outputTask = outputTask, pluginContext = pluginContext) - val query = updateTask.compiledTemplate.generate(Map.empty, taskProperties) + val templateVariables = pluginContext.templateVariables.all.variables + val query = updateTask.compiledTemplate.generate(Map.empty, taskProperties, templateVariables) batchEmitter.update(query) } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 2cd2113402..fba6cb66f9 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -34,8 +34,9 @@ case class SparqlSelectCustomTask( @Param( label = "Select query", value = "A SPARQL 1.1 select query. The query supports Jinja templating. " + - "Properties of the input and output tasks can be accessed via the 'inputProperties' and 'outputProperties' objects. " + - "Example: SELECT * WHERE { GRAPH {{ inputProperties.uri(\"graph\") }} { ?s ?p ?o } }", + "Parameters of the connected input and output tasks can be accessed via 'input.config.' and 'output.config.'. " + + "Project and global template variables are available as 'project.' and 'global.'. " + + "Example: SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } }", example = "select * where { ?s ?p ?o }") selectQuery: SparqlCodeParameter, @Param(label = "Result limit", value = "If set to a positive integer, the number of results is limited") diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 248c6c747c..3606ef6c4b 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -23,7 +23,7 @@ case class SparqlUpdateCustomTask( value = "The SPARQL UPDATE template for constructing SPARQL UPDATE queries for every entity from the input." + " The possible values for the template engine are `Jinja` (default), `Simple` and `Velocity Engine`." + " See the general documentation of this plugin for further details on the features of each template engine.", - example = "INSERT DATA { {{ row.uri(\"PROP_FROM_ENTITY_SCHEMA1\") }} rdf:label {{ row.plainLiteral(\"PROP_FROM_ENTITY_SCHEMA2\") }} } ;" + example = "INSERT DATA { <{{ input.entity.subject }}> rdfs:label \"{{ input.entity.label }}\" } ;" ) sparqlUpdateTemplate: SparqlCodeParameter, @Param(label = "Batch size", value = "How many entities should be handled in a single update request.") diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala new file mode 100644 index 0000000000..7e481e362b --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala @@ -0,0 +1,95 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.silkframework.entity.EntitySchema +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.execution.local.EmptyEntityTable +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateVariableName, TemplateVariableValue} + +import java.io.StringWriter + +/** + * SPARQL template implementation for the Jinja engine. + * + * Exposes the following variables: + * + * {{ input.config. }} -- parameter of the connected input task + * {{ input.entity. }} -- value of the current input entity + * {{ output.config. }} -- parameter of the connected output task + * {{ project. }} -- project-scoped template variable + * {{ global. }} -- global template variable + * + * Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript + * access is not supported. SPARQL-rendering filters (`| uri`, `| plainLiteral`, `| rawUnsafe`, `| exists`) + * are not implemented in this iteration and are a follow-up. + */ +class SparqlJinjaTemplate(template: CompiledTemplate) extends SparqlTemplate { + + import SparqlJinjaTemplate._ + + override def generate(placeholderAssignments: Map[String, String], + taskProperties: TaskProperties, + templateVariables: Seq[TemplateVariableValue] = Seq.empty): String = { + val values = buildValues(placeholderAssignments, taskProperties, templateVariables) + val writer = new StringWriter() + template.evaluate(values, writer) + writer.toString + } + + override def generateWithDefaults(): String = { + // Seed every referenced variable with a URI-like default so that QueryFactory can parse the result. + val genericUri = "urn:generic:1" + val defaults = referencedVariables.distinct.map(v => new TemplateVariableValue(v.name, v.scope, Seq(genericUri))) + val writer = new StringWriter() + template.evaluate(defaults, writer) + writer.toString + } + + override def validateUpdateQuery(batchSize: Int): Unit = { + // Without SPARQL rendering filters (| uri, | plainLiteral, ...), every entity reference inserts the + // raw string value with no escaping. Generating meaningful example queries for parse validation is + // therefore not possible in the current iteration. Skip validation until the filter follow-up lands. + } + + override def inputSchema: EntitySchema = { + val properties = entityPropertyNames + if (properties.isEmpty) { + EmptyEntityTable.schema + } else { + EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) + } + } + + override def isStaticTemplate: Boolean = { + entityPropertyNames.isEmpty + } + + private def buildValues(placeholderAssignments: Map[String, String], + taskProperties: TaskProperties, + templateVariables: Seq[TemplateVariableValue]): Seq[TemplateVariableValue] = { + val inputConfig = taskProperties.inputTask.map { case (k, v) => + new TemplateVariableValue(k, INPUT_CONFIG_SCOPE, Seq(v)) + } + val inputEntity = placeholderAssignments.map { case (k, v) => + new TemplateVariableValue(k, INPUT_ENTITY_SCOPE, Seq(v)) + } + val outputConfig = taskProperties.outputTask.map { case (k, v) => + new TemplateVariableValue(k, OUTPUT_CONFIG_SCOPE, Seq(v)) + } + (inputConfig ++ inputEntity ++ outputConfig).toSeq ++ templateVariables + } + + private def referencedVariables: Seq[TemplateVariableName] = { + template.variables.getOrElse(Seq.empty) + } + + private def entityPropertyNames: Seq[String] = { + referencedVariables.filter(_.scope == INPUT_ENTITY_SCOPE).map(_.name).distinct + } +} + +object SparqlJinjaTemplate { + + private[templating] final val INPUT_CONFIG_SCOPE: Seq[String] = Seq("input", "config") + private[templating] final val INPUT_ENTITY_SCOPE: Seq[String] = Seq("input", "entity") + private[templating] final val OUTPUT_CONFIG_SCOPE: Seq[String] = Seq("output", "config") +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala new file mode 100644 index 0000000000..e49015e2f6 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala @@ -0,0 +1,180 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.apache.jena.update.UpdateFactory +import org.silkframework.entity.EntitySchema +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.execution.local.EmptyEntityTable +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlLegacyTemplate._ +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} +import org.silkframework.runtime.validation.ValidationException + +import java.io.StringWriter +import scala.util.{Failure, Success, Try} + +/** + * SPARQL template implementation for the Velocity and Simple template engines. + * + * Exposes input entity values via a `row` object, and the connected input/output task parameters via + * `inputProperties` / `outputProperties` objects. All three objects offer the methods defined on + * [[TemplateValueAccessApi]] (`uri`, `plainLiteral`, `rawUnsafe`, `exists`). + */ +class SparqlLegacyTemplate(template: CompiledTemplate) extends SparqlTemplate { + + override def generate(placeholderAssignments: Map[String, String], + taskProperties: TaskProperties, + templateVariables: Seq[TemplateVariableValue] = Seq.empty): String = { + val values = scala.collection.mutable.LinkedHashMap[String, AnyRef]() + // Flat entity values (used by simple template engine) + placeholderAssignments.foreach { case (k, v) => values(k) = v } + // SPARQL context objects + values(ROW_VAR_NAME) = Row(placeholderAssignments) + values(INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) + values(OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) + val writer = new StringWriter() + template.evaluate(values.toMap, writer) + writer.toString + } + + override def generateWithDefaults(): String = { + val genericUri = "urn:generic:1" + val entityVariables = entityVariableNames + val assignments = entityVariables.map(_ -> genericUri).toMap + val inputPropVars = taskPropertyVariableNames(Seq(INPUT_PROPERTIES_VAR_NAME)).map(_ -> genericUri).toMap + val outputPropVars = taskPropertyVariableNames(Seq(OUTPUT_PROPERTIES_VAR_NAME)).map(_ -> genericUri).toMap + val taskProps = TaskProperties(inputPropVars, outputPropVars) + Try(generate(assignments, taskProps)) match { + case Failure(exception) => + throw new ValidationException( + "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) + case Success(value) => value + } + } + + override def validateUpdateQuery(batchSize: Int): Unit = { + if (usesRawUnsafe) { + // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. + } else { + // Generate example input assignments + val sparqlQuery = generateWithDefaults() + Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + + parseError.getMessage + ", example query: " + sparqlQuery) + } + if (batchSize > 1) { + val batchSparql = sparqlQuery + "\n" + sparqlQuery + Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + + parseError.getMessage + ", example batch query: " + batchSparql) + } + } + } + } + + override def inputSchema: EntitySchema = { + val properties = entityVariableNames + if (properties.isEmpty) { + EmptyEntityTable.schema + } else { + EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) + } + } + + override def isStaticTemplate: Boolean = { + sparqlVariables match { + case Some(vars) => vars.isEmpty + case None => false + } + } + + /** SPARQL-specific method names that accept a string parameter representing an input path. */ + private val sparqlMethodNames = Set("uri", "plainLiteral", "rawUnsafe", "exists") + + /** Returns SPARQL-specific variables, extracting paths from method usages. */ + private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = { + val usages = templatingVariables.flatMap(v => template.methodUsages(v)) + if (usages.nonEmpty) { + val rowVars = sparqlMethodUsages(ROW_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue)) + val inputPropVars = sparqlMethodUsages(INPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, Seq(INPUT_PROPERTIES_VAR_NAME))) + val outputPropVars = sparqlMethodUsages(OUTPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, Seq(OUTPUT_PROPERTIES_VAR_NAME))) + Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) + } else { + template.variables + } + } + + /** Returns method usages on the given variable filtered to SPARQL-specific methods. */ + private def sparqlMethodUsages(variableName: String): Seq[TemplateMethodUsage] = { + template.methodUsages(variableName).filter(u => sparqlMethodNames.contains(u.methodName)) + } + + /** Checks if any SPARQL templating variable uses the rawUnsafe method. */ + private lazy val usesRawUnsafe: Boolean = { + templatingVariables.exists(varName => + sparqlMethodUsages(varName).exists(_.methodName == "rawUnsafe")) + } + + /** Returns entity variable names (those with empty scope). */ + private def entityVariableNames: Seq[String] = { + sparqlVariables match { + case Some(vars) => + vars.filter(_.scope.isEmpty).map(_.name).distinct + case None => + Seq.empty + } + } + + /** Returns variable names for a specific scope (e.g. "inputProperties", "outputProperties"). */ + private def taskPropertyVariableNames(scope: Seq[String]): Seq[String] = { + sparqlVariables match { + case Some(vars) => + vars.filter(_.scope == scope).map(_.name).distinct + case None => + Seq.empty + } + } +} + +object SparqlLegacyTemplate { + + private[templating] final val ROW_VAR_NAME = "row" + private[templating] final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" + private[templating] final val OUTPUT_PROPERTIES_VAR_NAME = "outputProperties" + + private final val templatingVariables = Seq(ROW_VAR_NAME, INPUT_PROPERTIES_VAR_NAME, OUTPUT_PROPERTIES_VAR_NAME) + + /** Row API. Represents a single row where input paths are either exactly one value or empty. + * + * Available in templates as the `row` variable. + * + * Examples (Velocity): + * + *

+    *   $row.uri("urn:prop:uriProp") ## Renders the value as a URI, e.g. 
+    *   $row.plainLiteral("urn:prop:stringProp") ## Renders the value as a plain literal, e.g. "Quotes \" are escaped"
+    *   $row.rawUnsafe("urn:prop:trustedValuesOnly") ## Puts the value as-is; UNSAFE, prone to injection.
+    *   #if ( $row.exists("urn:prop:valueMightNotExist") )
+    *     $row.plainLiteral("urn:prop:valueMightNotExist")
+    *   #end
+    * 
+ * + * @param inputValues Map of available input values. Paths without a value are absent. + */ + case class Row(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = ROW_VAR_NAME + } + + /** Similar to Row, but for the input task properties. */ + case class InputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = INPUT_PROPERTIES_VAR_NAME + } + + /** Similar to Row, but for the output task properties. */ + case class OutputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = OUTPUT_PROPERTIES_VAR_NAME + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index 0a466d4143..e2456ae5b6 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -1,201 +1,69 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.apache.jena.update.UpdateFactory import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.entity.EntitySchema -import org.silkframework.entity.paths.UntypedPath -import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate.{INPUT_PROPERTIES_VAR_NAME, InputProperties, OUTPUT_PROPERTIES_VAR_NAME, OutputProperties, ROW_VAR_NAME, Row} import org.silkframework.runtime.plugin.PluginContext -import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateMethodUsage, TemplateVariableName} -import org.silkframework.runtime.validation.ValidationException - -import java.io.StringWriter -import scala.util.{Failure, Success, Try} +import org.silkframework.runtime.templating.{TemplateEngines, TemplateVariableValue} /** - * Wraps a [[CompiledTemplate]] and adds SPARQL Update specific capabilities. + * Compiled SPARQL template. Encapsulates rendering a SPARQL query from a template and the associated + * context (connected input/output task properties, current entity values, project/global variables). + * + * Two concrete implementations exist: + * + * - [[SparqlJinjaTemplate]] for the Jinja engine, which exposes variables as `input.config.*`, + * `input.entity.*`, `output.config.*`, `project.*` and `global.*`. + * - [[SparqlLegacyTemplate]] for the Velocity and Simple engines, which exposes the historical + * `row` / `inputProperties` / `outputProperties` object API. */ -class SparqlTemplate(template: CompiledTemplate) { +trait SparqlTemplate { /** - * Renders the template based on the variable assignments. + * Renders the template. * - * @param placeholderAssignments For each placeholder in the query template. - * @param taskProperties The input and output task properties. - * */ - def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { - val values = scala.collection.mutable.LinkedHashMap[String, AnyRef]() - // Flat entity values (used by simple template engine) - placeholderAssignments.foreach { case (k, v) => values(k) = v } - // SPARQL context objects - values(ROW_VAR_NAME) = Row(placeholderAssignments) - values(INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) - values(OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) - val writer = new StringWriter() - template.evaluate(values.toMap, writer) - writer.toString - } - - /** - * Renders the template based assigning default values for all variables. + * @param placeholderAssignments Values from the current input entity, keyed by the entity path. + * @param taskProperties Parameter values of the connected input and output tasks. + * @param templateVariables Project and global template variables (scoped as `Seq("project")` / `Seq("global")`). + * Only used by the Jinja implementation; the legacy implementation ignores them. */ - def generateWithDefaults(): String = { - val genericUri = "urn:generic:1" - val entityVariables = entityVariableNames - val assignments = entityVariables.map(_ -> genericUri).toMap - val inputPropVars = taskPropertyVariableNames(Seq("inputProperties")).map(_ -> genericUri).toMap - val outputPropVars = taskPropertyVariableNames(Seq("outputProperties")).map(_ -> genericUri).toMap - val taskProps = TaskProperties(inputPropVars, outputPropVars) - Try(generate(assignments, taskProps)) match { - case Failure(exception) => - throw new ValidationException( - "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) - case Success(value) => value - } - } + def generate(placeholderAssignments: Map[String, String], + taskProperties: TaskProperties, + templateVariables: Seq[TemplateVariableValue] = Seq.empty): String - /** Validates the template, including batch validation if batchSize > 1. */ - def validateUpdateQuery(batchSize: Int): Unit = { - if (usesRawUnsafe) { - // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. - } else { - // Generate example input assignments - val sparqlQuery = generateWithDefaults() - Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => - throw new ValidationException( - "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparqlQuery) - } - if (batchSize > 1) { - val batchSparql = sparqlQuery + "\n" + sparqlQuery - Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => - throw new ValidationException( - "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) - } - } - } - } + /** Renders the template with example values for every variable. Used to derive schemas and validate queries. */ + def generateWithDefaults(): String - /** The input entity schema that is expected by the template. */ - def inputSchema: EntitySchema = { - val properties = entityVariableNames - if (properties.isEmpty) { - EmptyEntityTable.schema - } else { - EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) - } - } + /** Validates the template and, if batchSize > 1, that batching produces valid SPARQL. */ + def validateUpdateQuery(batchSize: Int): Unit - /** True if the given template is static, i.e. contains no placeholder variables. */ - def isStaticTemplate: Boolean = { - sparqlVariables match { - case Some(vars) => vars.isEmpty - case None => false - } - } - - /** SPARQL-specific method names that accept a string parameter representing an input path. */ - private val sparqlMethodNames = Set("uri", "plainLiteral", "rawUnsafe", "exists") - - /** Returns SPARQL-specific variables, extracting paths from method usages. */ - private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = { - val usages = SparqlTemplate.templatingVariables.flatMap(v => template.methodUsages(v)) - if (usages.nonEmpty) { - val rowVars = sparqlMethodUsages(SparqlTemplate.ROW_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue)) - val inputPropVars = sparqlMethodUsages(INPUT_PROPERTIES_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, Seq("inputProperties"))) - val outputPropVars = sparqlMethodUsages(OUTPUT_PROPERTIES_VAR_NAME) - .map(u => new TemplateVariableName(u.parameterValue, Seq("outputProperties"))) - Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) - } else { - template.variables - } - } - - /** Returns method usages on the given variable filtered to SPARQL-specific methods. */ - private def sparqlMethodUsages(variableName: String): Seq[TemplateMethodUsage] = { - template.methodUsages(variableName).filter(u => sparqlMethodNames.contains(u.methodName)) - } - - /** Checks if any SPARQL templating variable uses the rawUnsafe method. */ - private lazy val usesRawUnsafe: Boolean = { - SparqlTemplate.templatingVariables.exists(varName => - sparqlMethodUsages(varName).exists(_.methodName == "rawUnsafe")) - } + /** Entity schema that the template expects on its input port. */ + def inputSchema: EntitySchema - /** Returns entity variable names (those with empty scope). */ - private def entityVariableNames: Seq[String] = { - sparqlVariables match { - case Some(vars) => - vars.filter(_.scope.isEmpty).map(_.name).distinct - case None => - Seq.empty - } - } - - /** Returns variable names for a specific scope (e.g. "inputProperties", "outputProperties"). */ - private def taskPropertyVariableNames(scope: Seq[String]): Seq[String] = { - sparqlVariables match { - case Some(vars) => - vars.filter(_.scope == scope).map(_.name).distinct - case None => - Seq.empty - } - } + /** True if the template does not reference any entity values and thus needs no input port. */ + def isStaticTemplate: Boolean } object SparqlTemplate { - private final val ROW_VAR_NAME = "row" - private final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" - private final val OUTPUT_PROPERTIES_VAR_NAME = "outputProperties" - - private final val templatingVariables = Seq(ROW_VAR_NAME, INPUT_PROPERTIES_VAR_NAME, OUTPUT_PROPERTIES_VAR_NAME) + // Must match JinjaTemplateEngine.id. Duplicated here because silk-plugins-rdf does not depend on + // silk-plugins-templating-jinja at compile time (only at test scope). + private final val JINJA_ENGINE_ID = "jinja" /** - * Creates a SPARQL template from a string. + * Creates a SPARQL template using the given template engine. */ def create(templateEngineId: String, template: String): SparqlTemplate = { - val templateEngine = TemplateEngines.create(templateEngineId) - new SparqlTemplate(templateEngine.compile(template)) - } - - /** Row API used in SPARQL templates. Represents a single row where input paths are either exactly one value or empty. - * - * The Row object will be available in Velocity templates as 'row' variable. - * - * Examples: - * - *
-   *   $row.uri("urn:prop:uriProp") ## Renders the value of the input path as URI, e.g. 
-   *   $row.plainLiteral("urn:prop:stringProp") ## Renders the value of the input paths as plain string, e.g. "Quotes \" are escaped"
-   *   $row.rawUnsafe("urn:prop:trustedValuesOnly") ## Puts the value as it is into the rendered template. This is UNSAFE and prone to injection attacks.
-   *   #if ( $row.exists("urn:prop:valueMightNotExist") ) ## Checks if a value exists for the input path, i.e. values can always be optional.
-   *     $row.plainLiteral("urn:prop:valueMightNotExist") ## If no value exists for the input path then this would throw an exception
-   *   #end
-   * 
- * - * @param inputValues The map of existing input values, i.e. values that were defined by input paths, but where no value was available are not set. - */ - case class Row(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = ROW_VAR_NAME - } - - /** Similar to Row, but for the input task properties. */ - case class InputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = INPUT_PROPERTIES_VAR_NAME - } - - /** Similar to Row, but for the output task properties. */ - case class OutputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = OUTPUT_PROPERTIES_VAR_NAME + val engine = TemplateEngines.create(templateEngineId) + val compiled = engine.compile(template) + if (templateEngineId == JINJA_ENGINE_ID) { + new SparqlJinjaTemplate(compiled) + } else { + new SparqlLegacyTemplate(compiled) + } } } -/** Makes properties of the input and output task of a SPARQL Update operator execution available. */ +/** Makes properties of the input and output task of a SPARQL operator execution available to the template. */ case class TaskProperties(inputTask: Map[String, String], outputTask: Map[String, String]) object TaskProperties { @@ -205,7 +73,7 @@ object TaskProperties { pluginContext: PluginContext): TaskProperties = { // It's obligatory to have empty prefixes here, since we do not want to have prefixed URIs for URI parameters implicit val updatedPluginContext: PluginContext = PluginContext.updatedPluginContext(pluginContext, prefixes = Some(Prefixes.empty)) - val inputProperties = createTaskProperties(inputTask) + val inputProperties = createTaskProperties(inputTask) val outputProperties = createTaskProperties(outputTask) TaskProperties(inputProperties, outputProperties) } @@ -214,6 +82,4 @@ object TaskProperties { (implicit pluginContext: PluginContext): Map[String, String] = { task.toSeq.flatMap(_.parameters.toStringMap).toMap } - - -} \ No newline at end of file +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/resources/org/silkframework/plugins/dataset/rdf/sparqlUpdateProject.zip b/silk-plugins/silk-plugins-rdf/src/test/resources/org/silkframework/plugins/dataset/rdf/sparqlUpdateProject.zip index af082b443de35e08fc06cdc1b675ad037c4177c4..a028a08b33fbf9e62ea28b3cc0cc37b923733648 100644 GIT binary patch delta 1244 zcmaDEel>i%>_d8?h8zQ#0 z8OFyM{+zm0_~_;&=lC5CC*>-8E-ubG@a}}WOz^Ul9bbP}_&hz5`MNaGqPpUCa+T|< z<+FmWioJbZ?_GFw8i!!+8r7DH$q(gI!jDdj=ljGe`z*o0UU5!9p7A%vnx~ORW^yvG zSv={^q==(OH%OOHI=3OKXOP`ah{!}e%Jo%`#9l<#S54sLzl1E6{aA5 z&c)d1_1gfc$nXf(gPLV0Z!l!-$oybow5H9y$8FDK-oMGB$zLodf8Y1d&U^3Qc=r8Q z*PZP#FW+{y=I^uiwTJ$)2lmPbcr&wzFfcH1Fn~f%B=N9rC@|zcGchpOO%~)5pPa`e zJb90bc>U5z_x%qU2pqrvQ~Q;Wob2QuCa;B9l1vu4JnL?0-*opuL3Z|a!3l5bzq6fK ze`G>NvFGJ`%kO9J55K2Zw0cv(6M3G;(dyR^)hx@m5)+dPJm)g?rS#H+5iNHT%T_*= zs9rc{!zl~K_l=^miLL?3w-3%gB5|p~-0XARe&))Qfas7(^_$vP+g=VfRO2YT@MYHZ z>GwokUsV3HiaG!JUS)E7j{n7T=VdG6B_r*vbtyE7NCG0V{G{0eu zOw*0)DJci$1ef`(Ief=HoO9)&u!1EiO~U40jx(y2zkJ~nN?owKVa1A=zwWKPyLBfd zwUw!dZJI5$lV#E@hjTq{S59qxef(K}Ywt4VKUS|RKAW@d&XhdGY5Hf)nKi$5vbRYy8RrSG-5X9%$7vHn6}uHu=eE+5(f^IRtoM z8n!hmb3i4fC%173O=brw;8K$S>UzKll#rXen_GBtxSGIZDRq$GH!iSXh`P*V7A|CC z^m)J%23)+8f2j%p0|ume7cWpyW%6=0eIUn!AILG7%%v_cxd3d|eSyh>g2t2gt7`yd z>xCw76w=iR@MdHZVSpracwj;us{wN~*q4ulff^Jhr)U@fb!&-CR+Lbfyi3Cd$Tcyptzt2eExu0=W(VjXdRy delta 1302 zcmcZ_{xW>SL}uQk?DCk*-@F>yj0_C=ljkv8*E1Ok?ES2rB`jpaF|{J^^HZPs_<_ZYt83CKHP|17`d5lxyYcPBzF7`Jr^23o|0uU(rR!tE%Pn&EzRhtI zd(5NAcY9H7eW{|}NhQUmOS7Ch!f*asnzLDB(`-jO&DBB6LVu+{;0T&L;d#znHAcl? zB?)f6H(VY^Z?3rg+GTEFT)IWF+3%b~Qgh3B-~F&$Eq>+IyTpp$G8=h&pX!|VzIG|e zRg+6?dKi20h1pjfWF%(KEI7XS*^1vUIK8E6LRCfH>~iCEoGj<^%24idm>1)5E45Sp z_uGEd7XA2EFIuxy?(3Yg@=a%wKEG;T8ueRW=pRFXH#3U}0|NsG11J#pDE#$E00tr- zGXsO&*oev^gCoAQu|!{m5@-4fZE5*bE+IiG8{V3h;#C%?mlpI?dsfi zjhp+c%?~hkO6ATnd2MO?{o9;#xyx^pj*7Fk^y;L-s z7wD$lQbm>Wj;kKi%&3TI;Z8@X2V|uob=QB5zrxUhVO^dyjv2zuoD~oY*OG>$%fb z6*!+ZdbZ5V!dPIZt$@{}m@j2ESqw?8{Je>QXQT4?uKZs$Wr^FeWj)V5%sLuYOCNpu zM`~GT(>tyvlkY#~KMnX3qI*cfcjcUor@1USnwXR>A4~4q>G8y12B-PKnhyE>%P#)$ zJDBsx#c=WQ53A28)nEA?)ccA5gB|1MWo#bIn{9cz88^S<6l0vcQcYm<4PH^^%`DuO zjGNyI{AChm07noorrP#>4$$CXVrH3~pdq!{L*zOi69dC!MP)Bw9DV>Q zQJJjI$TfM6vIYwSgALPUK~_yYy^7qN0B=Sn5e7um!}AYNw*lM^5M#2uih}}1>OfJ# zzzQ_iVe)Tu_Qjc=8)n0bo#oB&PF#CHmC)CcjkE2MVb2!37-DWhVatYv02U)vhu* zMjfQ>s{mA$E08l)2+Vn_uBe11Fj4&TLl~+Q6t+e{y>=pCfjJs(K+Zh~=Z}U9kdq<~ zmWdJJnyf3qIXPF;7%0dpfg~t2Swd4_@=ZwrZiF58Co^h=02R)Y1}n_hasqO)WT70f s$@64|z+vSfFTjJ) { ?s ?p ?o } }""" val task = SparqlSelectCustomTask(query) var capturedQuery = "" val activityContextMock = TestMocks.activityContextMock() @@ -76,7 +76,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec task.outputSchema.typedPaths.map(_.toUntypedPath.normalizedSerialization) mustBe IndexedSeq("s", "p", "o") capturedQuery must include(s"<$graphUri>") - capturedQuery must not include "inputProperties.uri" + capturedQuery must not include "input.config.graph" } private def taskWithEndpoint(sparqlEndpoint: SparqlEndpoint, graphUri: Option[String] = None): Task[DatasetSpec[RdfDataset]] = { diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala index b10954b6e0..17cc894c66 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala @@ -1,124 +1,84 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.apache.jena.vocabulary.XSD -import org.silkframework.plugins.templating.jinja.JinjaTemplateEngine -import org.silkframework.runtime.validation.ValidationException import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers -import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException +import org.silkframework.plugins.templating.jinja.JinjaTemplateEngine +import org.silkframework.runtime.templating.TemplateVariableValue +import org.silkframework.runtime.templating.exceptions.UnboundVariablesException class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { behavior of "SPARQL templating with the Jinja Template Engine" - it should "output the correct input paths of the template" in { - val templateString = - """ - |{{ row.uri("subject") }} - |{% if row.exists("somePath") %} - | Plain: {{ row.plainLiteral("somePath") }} - | Raw: {{ row.rawUnsafe("trustedValuePath") }} - |{% endif %} - |""".stripMargin - val compiled = JinjaTemplateEngine().compile(templateString) - compiled.variables.get.map(_.name).sorted mustBe Seq("row") - } - - it should "validate without problems for valid templates" in { - val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> - |INSERT DATA { - | "entity 1" . - | {% if row.exists("input1") %} - | {{ row.uri("input1") }} {{ row.plainLiteral("input2") }}^^xsd:string - | {% endif %} - |}; - |""".stripMargin - - JinjaTemplateEngine().compile(templateWithLogic).variables.get.map(_.name).sorted mustBe Seq("row") - validate(templateWithLogic) - } - - it should "always validate templates as correct if rawUnsafe() is used, because there is no way to generate meaningful examples to validate" in { - validate("""Completely broken SPARQL Update query with {{ row.rawUnsafe("something") }}""") - } - - it should "raise a validation error when the template is invalid" in { - intercept[ValidationException] { - validate("""DELETE DATA { unknownPrefix:label "test" } ;""") - } - intercept[ValidationException] { - validate( - """PREFIX foaf: - | - |WITH - |DELETE { ?person ?property ?value } - |WHERE { ?person ?property ?value ; foaf:givenName 'Fred } ;""".stripMargin) // Missing closing ' for literal - } - val batchTemplate = - s"""PREFIX xsd: <${XSD.getURI}> - |INSERT DATA { "hello" } ;""".stripMargin - validate(batchTemplate.dropRight(1), batchSize = 1) // Dropped ';' at the end, not batch supported, but batch size is 1 - intercept[ValidationException] { - validate(batchTemplate.dropRight(1)) // Dropped ';' at the end, not batch supported - } - } - - it should "render uri() values as SPARQL URI syntax" in { + it should "render values from the current input entity via input.entity" in { val result = generate( - """INSERT DATA { {{ row.uri("subject") }} "value" } ;""", - Map("subject" -> "urn:entity:1") + """INSERT DATA { <{{ input.entity.subject }}> "value" } ;""", + assignments = Map("subject" -> "urn:entity:1") ) result must include("") } - it should "render plainLiteral() values as escaped SPARQL literals" in { + it should "render parameters of the connected input task via input.config" in { val result = generate( - """INSERT DATA { {{ row.plainLiteral("label") }} } ;""", - Map("label" -> """hello "world"""") + """SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } }""", + taskProps = TaskProperties(Map("graph" -> "urn:graph:1"), Map.empty) ) - result must include(""""hello \"world\""""") + result must include("") } - it should "inject rawUnsafe() values verbatim without modification" in { - val rawValue = " " + it should "render parameters of the connected output task via output.config" in { val result = generate( - """INSERT DATA { {{ row.rawUnsafe("raw") }} } ;""", - Map("raw" -> rawValue) + """INSERT DATA { GRAPH <{{ output.config.graph }}> { } } ;""", + taskProps = TaskProperties(Map.empty, Map("graph" -> "urn:graph:out")) ) - result must include(rawValue) - } - - it should "include or exclude blocks based on exists()" in { - val template = - """INSERT DATA { {% if row.exists("x") %} "found" .{% endif %} } ;""" - generate(template, Map("x" -> "urn:entity:1")) must include("found") - generate(template, Map.empty) must not include "found" + result must include("") } - it should "render inputProperties and outputProperties via TaskProperties" in { + it should "render project and global template variables" in { + val project = new TemplateVariableValue("myVar", Seq("project"), Seq("projectValue")) + val global = new TemplateVariableValue("myVar", Seq("global"), Seq("globalValue")) val result = generate( - """INSERT DATA { {{ inputProperties.uri("x") }} {{ outputProperties.uri("y") }} } ;""", - assignments = Map.empty, - taskProps = TaskProperties(Map("x" -> "urn:input:1"), Map("y" -> "urn:output:1")) + """{{ project.myVar }} / {{ global.myVar }}""", + templateVariables = Seq(project, global) ) - result must include("") - result must include("") + result must include("projectValue / globalValue") } - it should "throw a TemplateExecutionException when uri() receives a non-URI value" in { - intercept[TemplateEvaluationException] { - generate( - """INSERT DATA { {{ row.uri("subject") }} "value" } ;""", - Map("subject" -> "not a uri") - ) + it should "reject old Jinja syntax (row, inputProperties, outputProperties)" in { + intercept[UnboundVariablesException] { + generate("""{{ row.uri("x") }}""", assignments = Map("x" -> "urn:a:b")) + } + intercept[UnboundVariablesException] { + generate("""{{ inputProperties.uri("graph") }}""", + taskProps = TaskProperties(Map("graph" -> "urn:g:1"), Map.empty)) } } - def generate(template: String, assignments: Map[String, String], taskProps: TaskProperties = TaskProperties(Map.empty, Map.empty)): String = { - new SparqlTemplate(JinjaTemplateEngine().compile(template)).generate(assignments, taskProps) + it should "derive the input schema from input.entity.* references" in { + val template = SparqlTemplate.create(JinjaTemplateEngine.id, + """ + |INSERT DATA { + | <{{ input.entity.subject }}> "{{ input.entity.label }}" . + | <{{ input.entity.subject }}> "value" . + |} ; + |""".stripMargin) + val paths = template.inputSchema.typedPaths.flatMap(_.property).map(_.propertyUri.uri) + paths.toSet mustBe Set("subject", "label") + } + + it should "report a Jinja template with no entity references as static" in { + val staticTemplate = SparqlTemplate.create(JinjaTemplateEngine.id, + """INSERT DATA { <{{ input.config.target }}> "v" } ;""") + staticTemplate.isStaticTemplate mustBe true + val dynamicTemplate = SparqlTemplate.create(JinjaTemplateEngine.id, + """INSERT DATA { <{{ input.entity.subject }}> "v" } ;""") + dynamicTemplate.isStaticTemplate mustBe false } - def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlTemplate(JinjaTemplateEngine().compile(template)).validateUpdateQuery(batchSize) + private def generate(template: String, + assignments: Map[String, String] = Map.empty, + taskProps: TaskProperties = TaskProperties(Map.empty, Map.empty), + templateVariables: Seq[TemplateVariableValue] = Seq.empty): String = { + SparqlTemplate.create(JinjaTemplateEngine.id, template).generate(assignments, taskProps, templateVariables) } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala index 4744c4cf2f..0f4ba55c4a 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala @@ -79,7 +79,7 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { """SELECT * WHERE { | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") |}""".stripMargin - val template = new SparqlTemplate(VelocityTemplateEngine().compile(stringTemplate)) + val template = new SparqlLegacyTemplate(VelocityTemplateEngine().compile(stringTemplate)) for(i <- 1 to 10) { val rendered = template.generate(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i"), TaskProperties(Map.empty, Map.empty)) rendered mustBe @@ -113,10 +113,10 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { } private def generate(templateString: String, bindings: Map[String, String]): String = { - new SparqlTemplate(VelocityTemplateEngine().compile(templateString)).generate(bindings, TaskProperties(Map.empty, Map.empty)) + new SparqlLegacyTemplate(VelocityTemplateEngine().compile(templateString)).generate(bindings, TaskProperties(Map.empty, Map.empty)) } def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlTemplate(VelocityTemplateEngine().compile(template)).validateUpdateQuery(batchSize) + new SparqlLegacyTemplate(VelocityTemplateEngine().compile(template)).validateUpdateQuery(batchSize) } } diff --git a/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala b/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala index a1d33651e5..46845c36bd 100644 --- a/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala +++ b/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala @@ -94,7 +94,7 @@ class ProjectTaskApiTest extends AnyFlatSpec with SingleProjectWorkspaceProvider val datasetLabel = "In-memory dataset" val customLabel = "Custom SPARQL Update" val transformTask = "transformInContext" - project.addTask(customId, SparqlUpdateCustomTask("INSERT DATA { {{ row.uri(\"PROP_FROM_ENTITY_SCHEMA1\") }}

}"), MetaData(Some(customLabel))) + project.addTask(customId, SparqlUpdateCustomTask("INSERT DATA { <{{ input.entity.PROP_FROM_ENTITY_SCHEMA1 }}>

}"), MetaData(Some(customLabel))) project.addTask(datasetId, DatasetSpec(InMemoryDataset()), metaData = MetaData(Some(datasetLabel))) project.addTask(transformTask, TransformSpec(DatasetSelection(datasetId), output = IdentifierOptionParameter(Some(Identifier(customId))))) val TaskContextResponse(inputTasks, outputTasks, originalInputs, originalOutputs) = taskContext(projectId, transformTask, WorkflowTaskContext( From f6c6822482d190b28640636069ff81a943f2b820 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 22 Apr 2026 15:45:06 +0200 Subject: [PATCH 38/63] Add new SPARQL transformers: validate_uri() -> return plain and *valid IRI* without "<>", error if not valid escape_literal() -> Renders a parameter of the input task as *plain literal*, without enclosing quotation marks escape_multiline_literal() -> Renders a parameter of the input task as *literal*, escaping tripple single/double quotes --- .../TemplateVariableConversions.scala | 2 +- .../templating/SparqlTemplateJinjaTest.scala | 63 ++++++++++++++++++- .../rule/plugins/RulePlugins.scala | 5 ++ .../sparql/EscapeLiteralTransformer.scala | 52 +++++++++++++++ .../EscapeMultilineLiteralTransformer.scala | 55 ++++++++++++++++ .../sparql/ValidateUriTransformer.scala | 45 +++++++++++++ .../sparql/EscapeLiteralTransformerTest.scala | 5 ++ ...scapeMultilineLiteralTransformerTest.scala | 5 ++ .../sparql/ValidateUriTransformerTest.scala | 5 ++ 9 files changed, 235 insertions(+), 2 deletions(-) create mode 100644 silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformer.scala create mode 100644 silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformer.scala create mode 100644 silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala create mode 100644 silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformerTest.scala create mode 100644 silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformerTest.scala create mode 100644 silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformerTest.scala diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala index 6ac9eb9d8e..36833baba2 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala @@ -26,7 +26,7 @@ object TemplateVariableConversions { * @param scope The base scope. Nested parameters extend this scope with the parameter key. */ def fromTask(task: Task[_ <: TaskSpec], scope: Seq[String] = Seq("task"))(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { - fromPluginParameters(task.data.parameters, scope :+ "parameters") + fromPluginParameters(task.data.parameters, scope) } private def fromPluginParameters(values: ParameterValues, scope: Seq[String] = Seq.empty)(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala index 17cc894c66..1a25edda46 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala @@ -4,7 +4,7 @@ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers import org.silkframework.plugins.templating.jinja.JinjaTemplateEngine import org.silkframework.runtime.templating.TemplateVariableValue -import org.silkframework.runtime.templating.exceptions.UnboundVariablesException +import org.silkframework.runtime.templating.exceptions.{TemplateEvaluationException, UnboundVariablesException} class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { @@ -75,6 +75,67 @@ class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { dynamicTemplate.isStaticTemplate mustBe false } + it should "render a realistic SPARQL Update template combining all variable scopes, filters and a conditional" in { + val templateString = + """PREFIX rdfs: + |WITH <{{ output.config.graph | validate_uri }}> + |DELETE { <{{ input.entity.subject | validate_uri }}> ?p ?o } + |INSERT { + | <{{ input.entity.subject | validate_uri }}> rdfs:label "{{ input.entity.label | escape_literal }}" . + | <{{ input.entity.subject | validate_uri }}> <{{ project.labelProp | validate_uri }}> "{{ global.author | escape_literal }}" . + | {% if input.entity.comment %} + | <{{ input.entity.subject | validate_uri }}> rdfs:comment '''{{ input.entity.comment | escape_multiline_literal }}''' . + | {% endif %} + |} + |WHERE { <{{ input.entity.subject | validate_uri }}> ?p ?o } ; + |""".stripMargin + val template = SparqlTemplate.create(JinjaTemplateEngine.id, templateString) + + template.inputSchema.typedPaths.flatMap(_.property).map(_.propertyUri.uri).toSet mustBe + Set("subject", "label", "comment") + template.isStaticTemplate mustBe false + + val taskProps = TaskProperties(inputTask = Map.empty, outputTask = Map("graph" -> "urn:graph:out")) + val projectAndGlobal = Seq( + new TemplateVariableValue("labelProp", Seq("project"), Seq("urn:prop:label")), + new TemplateVariableValue("author", Seq("global"), Seq("Jane")) + ) + + val rendered = template.generate( + placeholderAssignments = Map( + "subject" -> "urn:entity:1", + "label" -> """O'Reilly & "friends"""", + "comment" -> "has ''' triple quotes" + ), + taskProperties = taskProps, + templateVariables = projectAndGlobal + ) + + rendered must include("WITH ") + rendered must include("") + rendered must include(""""O\'Reilly & \"friends\""""") + rendered must include("") + rendered must include(""""Jane"""") + rendered must include("""has \'\'\' triple quotes""") + + // With an empty `comment`, the {% if %} branch is skipped. + val withoutComment = template.generate( + placeholderAssignments = Map("subject" -> "urn:entity:1", "label" -> "plain", "comment" -> ""), + taskProperties = taskProps, + templateVariables = projectAndGlobal + ) + withoutComment must not include "rdfs:comment" + + // An invalid IRI piped through validate_uri surfaces as a TemplateEvaluationException. + intercept[TemplateEvaluationException] { + template.generate( + placeholderAssignments = Map("subject" -> "not a uri", "label" -> "plain", "comment" -> ""), + taskProperties = taskProps, + templateVariables = projectAndGlobal + ) + } + } + private def generate(template: String, assignments: Map[String, String] = Map.empty, taskProps: TaskProperties = TaskProperties(Map.empty, Map.empty), diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/RulePlugins.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/RulePlugins.scala index c754317419..00e05c7631 100644 --- a/silk-rules/src/main/scala/org/silkframework/rule/plugins/RulePlugins.scala +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/RulePlugins.scala @@ -38,6 +38,7 @@ import org.silkframework.rule.plugins.transformer.numeric._ import org.silkframework.rule.plugins.transformer.replace.{MapTransformer, MapTransformerWithDefaultInput, RegexReplaceTransformer, ReplaceTransformer} import org.silkframework.rule.plugins.transformer.selection.{CoalesceTransformer, RegexSelectTransformer} import org.silkframework.rule.plugins.transformer.sequence.{GetValueByIndexTransformer, SortTransformer, ValuesToIndexesTransformer} +import org.silkframework.rule.plugins.transformer.sparql.{EscapeLiteralTransformer, EscapeMultilineLiteralTransformer, ValidateUriTransformer} import org.silkframework.rule.plugins.transformer.substring._ import org.silkframework.rule.plugins.transformer.tokenization.{CamelCaseTokenizer, Tokenizer} import org.silkframework.rule.plugins.transformer.validation._ @@ -148,6 +149,10 @@ class RulePlugins extends PluginModule { // Selection classOf[RegexSelectTransformer] :: classOf[CoalesceTransformer] :: + // SPARQL + classOf[ValidateUriTransformer] :: + classOf[EscapeLiteralTransformer] :: + classOf[EscapeMultilineLiteralTransformer] :: Nil private def measures: List[Class[_ <: AnyPlugin]] = diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformer.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformer.scala new file mode 100644 index 0000000000..14b233f24f --- /dev/null +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformer.scala @@ -0,0 +1,52 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.annotations.{TransformExample, TransformExamples} +import org.silkframework.rule.input.SimpleTransformer +import org.silkframework.runtime.plugin.annotations.Plugin + +@Plugin( + id = "escape_literal", + categories = Array("SPARQL"), + label = "Escape SPARQL plain literal", + description = "Escapes a value so it can be safely used inside a SPARQL short-form string literal. " + + "Escapes backslashes, quotes, newlines, carriage returns and tabs. " + + "The returned value does not include enclosing quotation marks." +) +@TransformExamples(Array( + new TransformExample( + input1 = Array("simple value"), + output = Array("simple value") + ), + new TransformExample( + input1 = Array("with \"quotes\""), + output = Array("with \\\"quotes\\\"") + ), + new TransformExample( + input1 = Array("back\\slash"), + output = Array("back\\\\slash") + ), + new TransformExample( + input1 = Array("line1\nline2"), + output = Array("line1\\nline2") + ) +)) +case class EscapeLiteralTransformer() extends SimpleTransformer { + + override def evaluate(value: String): String = { + val sb = new StringBuilder(value.length) + var i = 0 + while (i < value.length) { + value.charAt(i) match { + case '\\' => sb.append("\\\\") + case '"' => sb.append("\\\"") + case '\'' => sb.append("\\'") + case '\n' => sb.append("\\n") + case '\r' => sb.append("\\r") + case '\t' => sb.append("\\t") + case c => sb.append(c) + } + i += 1 + } + sb.toString + } +} \ No newline at end of file diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformer.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformer.scala new file mode 100644 index 0000000000..21577e9620 --- /dev/null +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformer.scala @@ -0,0 +1,55 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.annotations.{TransformExample, TransformExamples} +import org.silkframework.rule.input.SimpleTransformer +import org.silkframework.runtime.plugin.annotations.Plugin + +import scala.util.matching.Regex + +@Plugin( + id = "escape_multiline_literal", + categories = Array("SPARQL"), + label = "Escape SPARQL multiline literal", + description = "Escapes a value so it can be safely used inside a SPARQL triple-quoted string literal " + + "(`\"\"\"...\"\"\"` or `'''...'''`). Escapes backslashes and breaks any run of three or more " + + "consecutive single or double quotes. Individual quotes and newlines are preserved. " + + "The returned value does not include enclosing quotation marks." +) +@TransformExamples(Array( + new TransformExample( + input1 = Array("simple\nvalue"), + output = Array("simple\nvalue") + ), + new TransformExample( + input1 = Array("with \"quote\""), + output = Array("with \"quote\"") + ), + new TransformExample( + input1 = Array("back\\slash"), + output = Array("back\\\\slash") + ), + new TransformExample( + input1 = Array("triple \"\"\" quotes"), + output = Array("triple \\\"\\\"\\\" quotes") + ), + new TransformExample( + input1 = Array("triple ''' quotes"), + output = Array("triple \\'\\'\\' quotes") + ) +)) +case class EscapeMultilineLiteralTransformer() extends SimpleTransformer { + + override def evaluate(value: String): String = { + val withBackslashes = value.replace("\\", "\\\\") + val noTripleDq = EscapeMultilineLiteralTransformer.dqRun3.replaceAllIn(withBackslashes, + m => Regex.quoteReplacement("\\\"" * m.matched.length)) + val noTripleSq = EscapeMultilineLiteralTransformer.sqRun3.replaceAllIn(noTripleDq, + m => Regex.quoteReplacement("\\'" * m.matched.length)) + noTripleSq + } +} + +object EscapeMultilineLiteralTransformer { + private val dqRun3: Regex = "\"{3,}".r + private val sqRun3: Regex = "'{3,}".r +} \ No newline at end of file diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala new file mode 100644 index 0000000000..2557d8e294 --- /dev/null +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala @@ -0,0 +1,45 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.annotations.{TransformExample, TransformExamples} +import org.silkframework.rule.input.SimpleTransformer +import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.validation.ValidationException + +import java.net.URI +import scala.util.Try + +@Plugin( + id = "validate_uri", + categories = Array("Validation", "SPARQL"), + label = "Validate URI", + description = "Validates that the input is a valid absolute IRI and returns it unchanged. " + + "Throws a validation error if the input is not a valid IRI. " +) +@TransformExamples(Array( + new TransformExample( + input1 = Array("http://example.org/entity1"), + output = Array("http://example.org/entity1") + ), + new TransformExample( + input1 = Array("urn:example:1"), + output = Array("urn:example:1") + ), + new TransformExample( + input1 = Array("not a uri"), + throwsException = classOf[ValidationException] + ), + new TransformExample( + input1 = Array(""), + throwsException = classOf[ValidationException] + ) +)) +case class ValidateUriTransformer() extends SimpleTransformer { + + override def evaluate(value: String): String = { + Try(new URI(value)) match { + case scala.util.Success(uri) if uri.isAbsolute => value + case _ => + throw new ValidationException(s"Value is not a valid absolute IRI: '$value'") + } + } +} \ No newline at end of file diff --git a/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformerTest.scala b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformerTest.scala new file mode 100644 index 0000000000..19c39f81cd --- /dev/null +++ b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformerTest.scala @@ -0,0 +1,5 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.test.TransformerTest + +class EscapeLiteralTransformerTest extends TransformerTest[EscapeLiteralTransformer] \ No newline at end of file diff --git a/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformerTest.scala b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformerTest.scala new file mode 100644 index 0000000000..3bde03cd73 --- /dev/null +++ b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformerTest.scala @@ -0,0 +1,5 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.test.TransformerTest + +class EscapeMultilineLiteralTransformerTest extends TransformerTest[EscapeMultilineLiteralTransformer] \ No newline at end of file diff --git a/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformerTest.scala b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformerTest.scala new file mode 100644 index 0000000000..719896a03f --- /dev/null +++ b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformerTest.scala @@ -0,0 +1,5 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.test.TransformerTest + +class ValidateUriTransformerTest extends TransformerTest[ValidateUriTransformer] \ No newline at end of file From ebfef063ee4e05d76e9aeb63be1857c2d34ca97c Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 22 Apr 2026 15:47:35 +0200 Subject: [PATCH 39/63] Update changelog and doc for added SPARQL transformers --- .../rdf/tasks/SparqlSelectCustomTask.md | 16 +++++++++++++--- .../rdf/tasks/SparqlUpdateCustomTask.md | 18 ++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index 093ccab317..43008aa9a8 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -28,14 +28,24 @@ The select query supports [Jinja](https://jinja.palletsprojects.com/) templating For example, to query the named graph that is configured on the input dataset: ```sparql -SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } } +SELECT * WHERE { GRAPH <{{ input.config.graph | validate_uri }}> { ?s ?p ?o } } ``` Parameter and variable names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript access is not supported. -Rendering helpers such as `| uri` or `| plainLiteral` filters are not yet implemented; values are inserted -verbatim and any quoting / URI brackets must be written explicitly in the template. +Values are inserted verbatim by default, so URI brackets (`<...>`) and quotation marks around literals must be +written in the template. The following filters are provided to render values safely: + +- `validate_uri`: validates that the value is a valid absolute IRI and returns it unchanged. Throws a validation + error otherwise. Wrap the output in `<...>` in the template. +- `escape_literal`: escapes backslashes, quotes, newlines, carriage returns and tabs so the value can be used + inside a short-form SPARQL string literal (`"..."` or `'...'`). No enclosing quotes are added. +- `escape_multiline_literal`: escapes backslashes and breaks any run of three or more consecutive single or double + quotes. Use for values that are wrapped in triple-quoted SPARQL literals (`"""..."""` or `'''...'''`). + +All transformer plugins are also available as Jinja filters under their plugin id (for example `lowerCase`, +`trim`, `urlEncode`). The output schema (i.e. the result variables) is derived from the query at configuration time by evaluating the template with default values, so the query must remain valid SPARQL regardless of the parameter values. diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md index 079c86e8ee..bb4ec98813 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md @@ -22,9 +22,9 @@ The `Simple` and `Velocity Engine` modes are deprecated. `{% %}` for control flow statements such as conditionals. ``` -DELETE DATA { <{{ input.entity.subject }}> rdfs:label "{{ input.entity.oldLabel }}" } ; +DELETE DATA { <{{ input.entity.subject | validate_uri }}> rdfs:label "{{ input.entity.oldLabel | escape_literal }}" } ; {% if input.entity.subject %} - INSERT DATA { <{{ input.entity.subject }}> rdfs:label "{{ input.entity.newLabel }}" } ; + INSERT DATA { <{{ input.entity.subject | validate_uri }}> rdfs:label "{{ input.entity.newLabel | escape_literal }}" } ; {% endif %} ``` @@ -39,8 +39,18 @@ The following variables are available: Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript access such as `input.entity["urn:prop:label"]` is not supported. -Rendering helpers such as `| uri` or `| plainLiteral` filters are not yet implemented; values are inserted -verbatim and any quoting / URI brackets must be written explicitly in the template. +Values are inserted verbatim by default, so URI brackets (`<...>`) and quotation marks around literals must be +written in the template. The following filters are provided to render values safely: + +- `validate_uri`: validates that the value is a valid absolute IRI and returns it unchanged. Throws a validation + error otherwise. Wrap the output in `<...>` in the template. +- `escape_literal`: escapes backslashes, quotes, newlines, carriage returns and tabs so the value can be used + inside a short-form SPARQL string literal (`"..."` or `'...'`). No enclosing quotes are added. +- `escape_multiline_literal`: escapes backslashes and breaks any run of three or more consecutive single or double + quotes. Use for values that are wrapped in triple-quoted SPARQL literals (`"""..."""` or `'''...'''`). + +All transformer plugins are also available as Jinja filters under their plugin id (for example `lowerCase`, +`trim`, `urlEncode`). ### Example of the `Simple` mode (deprecated) From f86e9304e0bab89cec1d058b5f6ea58dd6eb3adf Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 22 Apr 2026 16:02:27 +0200 Subject: [PATCH 40/63] For Jinja SPARQL templates: Don't iterate the cross product of input entity values. Jinja works well with lists of values. --- .../executors/LocalSparqlSelectExecutor.scala | 2 +- .../executors/LocalSparqlUpdateExecutor.scala | 51 +---------- .../templating/SparqlJinjaTemplate.scala | 22 ++--- .../templating/SparqlLegacyTemplate.scala | 69 ++++++++++++++- .../rdf/tasks/templating/SparqlTemplate.scala | 18 ++-- ...arqlUpdateTemplatingEngineSimpleTest.scala | 14 +++- .../CrossProductIteratorTest.scala | 84 +++++++++---------- .../templating/SparqlTemplateJinjaTest.scala | 36 ++++++-- .../SparqlTemplateVelocityTest.scala | 13 ++- 9 files changed, 182 insertions(+), 127 deletions(-) rename silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/{ => tasks/templating}/CrossProductIteratorTest.scala (90%) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index 69ebe16902..8c4fb90569 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -56,7 +56,7 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT val sparqlEndpoint = inputTask.data.plugin.sparqlEndpoint val taskProperties = TaskProperties.create(Some(inputTask), outputTask, pluginContext) val templateVariables = pluginContext.templateVariables.all.variables - executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.generate(Map.empty, taskProperties, templateVariables), selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.generate(None, taskProperties, templateVariables).head, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) } /** diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala index 479c008621..aa8f4fcd41 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala @@ -37,9 +37,8 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT checkInputSchema(expectedProperties, inputProperties.toSet) for (entity <- input.entities; values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) if values.forall(_.nonEmpty)) { - val it = CrossProductIterator(values, expectedProperties) - while (it.hasNext) { - val query = updateTask.compiledTemplate.generate(it.next(), taskProperties, templateVariables) + val projected = Entity(entity.uri, values, expectedSchema) + for (query <- updateTask.compiledTemplate.generate(Some(projected), taskProperties, templateVariables)) { batchEmitter.update(query) } } @@ -78,8 +77,7 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT (implicit pluginContext: PluginContext): Unit = { val taskProperties = TaskProperties.create(inputTask = inputTask, outputTask = outputTask, pluginContext = pluginContext) val templateVariables = pluginContext.templateVariables.all.variables - val query = updateTask.compiledTemplate.generate(Map.empty, taskProperties, templateVariables) - batchEmitter.update(query) + updateTask.compiledTemplate.generate(None, taskProperties, templateVariables).foreach(batchEmitter.update) } // Check that expected schema is subset of input schema @@ -110,49 +108,6 @@ case class SparqlUpdateExecutionReportUpdater(task: Task[TaskSpec], override def minEntitiesBetweenUpdates: Int = 1 } -case class CrossProductIterator(values: IndexedSeq[Seq[String]], - properties: IndexedSeq[String]) extends Iterator[Map[String, String]] { - assert(values.nonEmpty) - private val sizes = values.map(_.size).toArray - // Holds the current index combination - private val indexes = new Array[Int](values.size) - private val firstNonEmptyIdx = sizes.zipWithIndex.filter(_._1 > 0).map(_._2).headOption.getOrElse(-1) // -1 if all are empty - private val lastIndex = values.size - 1 - private var first: Boolean = true // This makes sure that at least one assignment is always generated - - override def hasNext: Boolean = first || firstNonEmptyIdx > -1 && (indexes(firstNonEmptyIdx) < sizes(firstNonEmptyIdx)) - - override def next(): Map[String, String] = { - if(!hasNext) { - throw new IllegalStateException("Iterator is fully consumed and has no more values!") - } - val nextAssignment = indexes.zipWithIndex.collect { - case (valueIdx, propertyIndex) if sizes(propertyIndex) > 0 => properties(propertyIndex) -> values(propertyIndex)(valueIdx) - }.toMap - setNextIndexCombinations() - first = false - nextAssignment - } - - private def setNextIndexCombinations(): Unit = { - var idx = lastIndex - while(idx > -1) { - indexes(idx) += 1 - if(indexes(idx) >= sizes(idx) && idx != firstNonEmptyIdx) { // Do not reset the first index, because of hasNext check - indexes(idx) = 0 - idx -= 1 - } else if(idx > 0) { - for(i <- (idx + 1) to lastIndex) { // null all index values after this index - indexes(i) = 0 - } - idx = -1 - } else { - idx = -1 - } - } - } -} - case class BatchSparqlUpdateEmitter[U](f: String => U, batchSize: Int, reportUpdater: SparqlUpdateExecutionReportUpdater) { private var sparqlUpdateQueries = new StringBuffer() private var queryCount = 0 diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala index 7e481e362b..5cfae7d7be 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala @@ -1,9 +1,9 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.silkframework.entity.EntitySchema +import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.runtime.templating.{CompiledTemplate, TemplateVariableName, TemplateVariableValue} +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateVariableConversions, TemplateVariableName, TemplateVariableValue} import java.io.StringWriter @@ -18,21 +18,19 @@ import java.io.StringWriter * {{ project. }} -- project-scoped template variable * {{ global. }} -- global template variable * - * Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript - * access is not supported. SPARQL-rendering filters (`| uri`, `| plainLiteral`, `| rawUnsafe`, `| exists`) - * are not implemented in this iteration and are a follow-up. + * Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`) */ class SparqlJinjaTemplate(template: CompiledTemplate) extends SparqlTemplate { import SparqlJinjaTemplate._ - override def generate(placeholderAssignments: Map[String, String], + override def generate(entity: Option[Entity], taskProperties: TaskProperties, - templateVariables: Seq[TemplateVariableValue] = Seq.empty): String = { - val values = buildValues(placeholderAssignments, taskProperties, templateVariables) + templateVariables: Seq[TemplateVariableValue] = Seq.empty): Iterable[String] = { + val values = buildValues(entity, taskProperties, templateVariables) val writer = new StringWriter() template.evaluate(values, writer) - writer.toString + Seq(writer.toString) } override def generateWithDefaults(): String = { @@ -63,15 +61,13 @@ class SparqlJinjaTemplate(template: CompiledTemplate) extends SparqlTemplate { entityPropertyNames.isEmpty } - private def buildValues(placeholderAssignments: Map[String, String], + private def buildValues(entity: Option[Entity], taskProperties: TaskProperties, templateVariables: Seq[TemplateVariableValue]): Seq[TemplateVariableValue] = { val inputConfig = taskProperties.inputTask.map { case (k, v) => new TemplateVariableValue(k, INPUT_CONFIG_SCOPE, Seq(v)) } - val inputEntity = placeholderAssignments.map { case (k, v) => - new TemplateVariableValue(k, INPUT_ENTITY_SCOPE, Seq(v)) - } + val inputEntity = entity.toSeq.flatMap(e => TemplateVariableConversions.fromEntity(e, INPUT_ENTITY_SCOPE)) val outputConfig = taskProperties.outputTask.map { case (k, v) => new TemplateVariableValue(k, OUTPUT_CONFIG_SCOPE, Seq(v)) } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala index e49015e2f6..9c46f618cb 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala @@ -1,7 +1,7 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.apache.jena.update.UpdateFactory -import org.silkframework.entity.EntitySchema +import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlLegacyTemplate._ @@ -20,9 +20,20 @@ import scala.util.{Failure, Success, Try} */ class SparqlLegacyTemplate(template: CompiledTemplate) extends SparqlTemplate { - override def generate(placeholderAssignments: Map[String, String], + override def generate(entity: Option[Entity], taskProperties: TaskProperties, - templateVariables: Seq[TemplateVariableValue] = Seq.empty): String = { + templateVariables: Seq[TemplateVariableValue] = Seq.empty): Iterable[String] = { + entity match { + case Some(e) if e.values.nonEmpty => + val properties = e.schema.typedPaths.map(_.normalizedSerialization) + CrossProductIterator(e.values, properties).map(renderOnce(_, taskProperties)).toSeq + case _ => + Seq(renderOnce(Map.empty, taskProperties)) + } + } + + private def renderOnce(placeholderAssignments: Map[String, String], + taskProperties: TaskProperties): String = { val values = scala.collection.mutable.LinkedHashMap[String, AnyRef]() // Flat entity values (used by simple template engine) placeholderAssignments.foreach { case (k, v) => values(k) = v } @@ -42,7 +53,7 @@ class SparqlLegacyTemplate(template: CompiledTemplate) extends SparqlTemplate { val inputPropVars = taskPropertyVariableNames(Seq(INPUT_PROPERTIES_VAR_NAME)).map(_ -> genericUri).toMap val outputPropVars = taskPropertyVariableNames(Seq(OUTPUT_PROPERTIES_VAR_NAME)).map(_ -> genericUri).toMap val taskProps = TaskProperties(inputPropVars, outputPropVars) - Try(generate(assignments, taskProps)) match { + Try(renderOnce(assignments, taskProps)) match { case Failure(exception) => throw new ValidationException( "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) @@ -177,4 +188,54 @@ object SparqlLegacyTemplate { case class OutputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { override val templateVarName: String = OUTPUT_PROPERTIES_VAR_NAME } + + /** + * Iterates over the cross-product of per-property value lists, producing one `Map[String, String]` + * per combination. Used by the legacy template engine, which renders one query per combination. + * + * Preserves the existing behavior of emitting at least one (empty) assignment, even if all value + * lists are empty. + */ + private[templating] case class CrossProductIterator(values: IndexedSeq[Seq[String]], + properties: IndexedSeq[String]) extends Iterator[Map[String, String]] { + assert(values.nonEmpty) + private val sizes = values.map(_.size).toArray + // Holds the current index combination + private val indexes = new Array[Int](values.size) + private val firstNonEmptyIdx = sizes.zipWithIndex.filter(_._1 > 0).map(_._2).headOption.getOrElse(-1) // -1 if all are empty + private val lastIndex = values.size - 1 + private var first: Boolean = true // This makes sure that at least one assignment is always generated + + override def hasNext: Boolean = first || firstNonEmptyIdx > -1 && (indexes(firstNonEmptyIdx) < sizes(firstNonEmptyIdx)) + + override def next(): Map[String, String] = { + if (!hasNext) { + throw new IllegalStateException("Iterator is fully consumed and has no more values!") + } + val nextAssignment = indexes.zipWithIndex.collect { + case (valueIdx, propertyIndex) if sizes(propertyIndex) > 0 => properties(propertyIndex) -> values(propertyIndex)(valueIdx) + }.toMap + setNextIndexCombinations() + first = false + nextAssignment + } + + private def setNextIndexCombinations(): Unit = { + var idx = lastIndex + while (idx > -1) { + indexes(idx) += 1 + if (indexes(idx) >= sizes(idx) && idx != firstNonEmptyIdx) { // Do not reset the first index, because of hasNext check + indexes(idx) = 0 + idx -= 1 + } else if (idx > 0) { + for (i <- (idx + 1) to lastIndex) { // null all index values after this index + indexes(i) = 0 + } + idx = -1 + } else { + idx = -1 + } + } + } + } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index e2456ae5b6..b457d43dcd 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -1,7 +1,7 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.silkframework.config.{Prefixes, Task, TaskSpec} -import org.silkframework.entity.EntitySchema +import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.runtime.plugin.PluginContext import org.silkframework.runtime.templating.{TemplateEngines, TemplateVariableValue} @@ -21,14 +21,18 @@ trait SparqlTemplate { /** * Renders the template. * - * @param placeholderAssignments Values from the current input entity, keyed by the entity path. - * @param taskProperties Parameter values of the connected input and output tasks. - * @param templateVariables Project and global template variables (scoped as `Seq("project")` / `Seq("global")`). - * Only used by the Jinja implementation; the legacy implementation ignores them. + * @param entity The current input entity, or `None` for static templates / Select queries. + * The Jinja implementation exposes each entity property as a list of values + * under `input.entity.*`. The legacy implementation iterates over the + * cross-product of property values and emits one query per combination. + * @param taskProperties Parameter values of the connected input and output tasks. + * @param templateVariables Project and global template variables (scoped as `Seq("project")` / `Seq("global")`). + * Only used by the Jinja implementation; the legacy implementation ignores them. + * @return One rendered query for Jinja, or one query per cross-product combination for the legacy engine. */ - def generate(placeholderAssignments: Map[String, String], + def generate(entity: Option[Entity], taskProperties: TaskProperties, - templateVariables: Seq[TemplateVariableValue] = Seq.empty): String + templateVariables: Seq[TemplateVariableValue] = Seq.empty): Iterable[String] /** Renders the template with example values for every variable. Used to derive schemas and validate queries. */ def generateWithDefaults(): String diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index a1db3d0b8b..9997618aed 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -1,6 +1,8 @@ package org.silkframework.plugins.dataset.rdf +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.plugins.dataset.rdf.tasks._ import org.silkframework.plugins.dataset.rdf.tasks.templating._ import org.silkframework.runtime.validation.ValidationException @@ -69,18 +71,26 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { } it should "generate the correct SPARQL Update query from the template" in { - SparqlUpdateCustomTask(sparqlUpdateTemplate, templatingMode = SparqlSimpleTemplateEngine.id).compiledTemplate.generate(Map( + val bindings = Map( "PROP_FROM_ENTITY_SCHEMA1" -> "urn:some:uri", "PROP_FROM_ENTITY_SCHEMA2" -> "the old label", "PROP_FROM_ENTITY_SCHEMA3" -> """The new |label with some "'weird characters""".stripMargin - ), TaskProperties(Map.empty, Map.empty)) mustBe + ) + SparqlUpdateCustomTask(sparqlUpdateTemplate, templatingMode = SparqlSimpleTemplateEngine.id) + .compiledTemplate.generate(Some(entityFromMap(bindings)), TaskProperties(Map.empty, Map.empty)).head mustBe """PREFIX rdf: |DELETE DATA { rdf:label "the old label" } ; | INSERT DATA { rdf:label "The new\nlabel with some \"'weird characters" } ;""".stripMargin } + private def entityFromMap(values: Map[String, String]): Entity = { + val entries = values.toIndexedSeq + val schema = EntitySchema("", entries.map { case (k, _) => UntypedPath(k).asUntypedValueType }) + Entity("urn:test", entries.map { case (_, v) => Seq(v) }, schema) + } + def parse(sparqlUpdateTemplate: String, batchSize: Int = 2): Seq[SparqlUpdateTemplatePart] = { val compiled = SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate) new SparqlLegacyTemplate(compiled).validateUpdateQuery(batchSize) diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/CrossProductIteratorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/CrossProductIteratorTest.scala similarity index 90% rename from silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/CrossProductIteratorTest.scala rename to silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/CrossProductIteratorTest.scala index e1d1579bec..39bdf7ee2b 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/CrossProductIteratorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/CrossProductIteratorTest.scala @@ -1,43 +1,43 @@ -package org.silkframework.plugins.dataset.rdf - -import org.silkframework.plugins.dataset.rdf.executors.CrossProductIterator +package org.silkframework.plugins.dataset.rdf.tasks.templating + import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers - -class CrossProductIteratorTest extends AnyFlatSpec with Matchers { - behavior of "Cross Product Matcher" - - it should "calculate the cross product" in { - val it = CrossProductIterator(IndexedSeq(Seq("A", "B"), Seq("a"), Seq("1", "2", "3")), IndexedSeq("prop1", "prop2", "prop3")) - it.toSeq mustBe Seq( - Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "1"), - Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "2"), - Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "3"), - Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "1"), - Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "2"), - Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "3") - ) - } - - it should "leave out assignments with zero values" in { - val it = CrossProductIterator(IndexedSeq(Seq("A", "B"), Seq(), Seq("1", "2", "3")), IndexedSeq("prop1", "prop2", "prop3")) - it.toSeq mustBe Seq( - Map("prop1" -> "A", "prop3" -> "1"), - Map("prop1" -> "A", "prop3" -> "2"), - Map("prop1" -> "A", "prop3" -> "3"), - Map("prop1" -> "B", "prop3" -> "1"), - Map("prop1" -> "B", "prop3" -> "2"), - Map("prop1" -> "B", "prop3" -> "3") - ) - val it2 = CrossProductIterator(IndexedSeq(Seq(), Seq("a", "b"), Seq()), IndexedSeq("prop1", "prop2", "prop3")) - it2.toSeq mustBe Seq( - Map("prop2" -> "a"), - Map("prop2" -> "b") - ) - } - - it should "produce one empty assignment when all inputs are empty" in { - val it = CrossProductIterator(IndexedSeq(Seq(), Seq(), Seq()), IndexedSeq("prop1", "prop2", "prop3")) - it.toSeq mustBe Seq(Map()) - } -} +import org.scalatest.matchers.must.Matchers +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlLegacyTemplate.CrossProductIterator + +class CrossProductIteratorTest extends AnyFlatSpec with Matchers { + behavior of "Cross Product Matcher" + + it should "calculate the cross product" in { + val it = CrossProductIterator(IndexedSeq(Seq("A", "B"), Seq("a"), Seq("1", "2", "3")), IndexedSeq("prop1", "prop2", "prop3")) + it.toSeq mustBe Seq( + Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "1"), + Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "2"), + Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "3"), + Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "1"), + Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "2"), + Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "3") + ) + } + + it should "leave out assignments with zero values" in { + val it = CrossProductIterator(IndexedSeq(Seq("A", "B"), Seq(), Seq("1", "2", "3")), IndexedSeq("prop1", "prop2", "prop3")) + it.toSeq mustBe Seq( + Map("prop1" -> "A", "prop3" -> "1"), + Map("prop1" -> "A", "prop3" -> "2"), + Map("prop1" -> "A", "prop3" -> "3"), + Map("prop1" -> "B", "prop3" -> "1"), + Map("prop1" -> "B", "prop3" -> "2"), + Map("prop1" -> "B", "prop3" -> "3") + ) + val it2 = CrossProductIterator(IndexedSeq(Seq(), Seq("a", "b"), Seq()), IndexedSeq("prop1", "prop2", "prop3")) + it2.toSeq mustBe Seq( + Map("prop2" -> "a"), + Map("prop2" -> "b") + ) + } + + it should "produce one empty assignment when all inputs are empty" in { + val it = CrossProductIterator(IndexedSeq(Seq(), Seq(), Seq()), IndexedSeq("prop1", "prop2", "prop3")) + it.toSeq mustBe Seq(Map()) + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala index 1a25edda46..5e1881bfff 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala @@ -2,6 +2,8 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.plugins.templating.jinja.JinjaTemplateEngine import org.silkframework.runtime.templating.TemplateVariableValue import org.silkframework.runtime.templating.exceptions.{TemplateEvaluationException, UnboundVariablesException} @@ -102,14 +104,14 @@ class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { ) val rendered = template.generate( - placeholderAssignments = Map( + entity = Some(entityFromMap(Map( "subject" -> "urn:entity:1", "label" -> """O'Reilly & "friends"""", "comment" -> "has ''' triple quotes" - ), + ))), taskProperties = taskProps, templateVariables = projectAndGlobal - ) + ).head rendered must include("WITH ") rendered must include("") @@ -120,26 +122,44 @@ class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { // With an empty `comment`, the {% if %} branch is skipped. val withoutComment = template.generate( - placeholderAssignments = Map("subject" -> "urn:entity:1", "label" -> "plain", "comment" -> ""), + entity = Some(entityFromMap(Map("subject" -> "urn:entity:1", "label" -> "plain", "comment" -> ""))), taskProperties = taskProps, templateVariables = projectAndGlobal - ) + ).head withoutComment must not include "rdfs:comment" // An invalid IRI piped through validate_uri surfaces as a TemplateEvaluationException. intercept[TemplateEvaluationException] { template.generate( - placeholderAssignments = Map("subject" -> "not a uri", "label" -> "plain", "comment" -> ""), + entity = Some(entityFromMap(Map("subject" -> "not a uri", "label" -> "plain", "comment" -> ""))), taskProperties = taskProps, templateVariables = projectAndGlobal - ) + ).head } } + it should "expose multi-valued entity properties as lists iterable in the Jinja template" in { + val template = SparqlTemplate.create(JinjaTemplateEngine.id, + """{% for s in input.entity.subject %}INSERT DATA { <{{ s }}> "x" } ; + |{% endfor %}""".stripMargin) + val schema = EntitySchema("", IndexedSeq(UntypedPath("subject").asUntypedValueType)) + val entity = Entity("urn:e:1", IndexedSeq(Seq("urn:a:1", "urn:a:2")), schema) + val rendered = template.generate(Some(entity), TaskProperties(Map.empty, Map.empty)).head + rendered must include("") + rendered must include("") + } + private def generate(template: String, assignments: Map[String, String] = Map.empty, taskProps: TaskProperties = TaskProperties(Map.empty, Map.empty), templateVariables: Seq[TemplateVariableValue] = Seq.empty): String = { - SparqlTemplate.create(JinjaTemplateEngine.id, template).generate(assignments, taskProps, templateVariables) + val entity = if (assignments.isEmpty) None else Some(entityFromMap(assignments)) + SparqlTemplate.create(JinjaTemplateEngine.id, template).generate(entity, taskProps, templateVariables).head + } + + private def entityFromMap(values: Map[String, String]): Entity = { + val entries = values.toIndexedSeq + val schema = EntitySchema("", entries.map { case (k, _) => UntypedPath(k).asUntypedValueType }) + Entity("urn:test", entries.map { case (_, v) => Seq(v) }, schema) } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala index 0f4ba55c4a..eddec735dc 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala @@ -1,6 +1,8 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.apache.jena.vocabulary.XSD +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.plugins.templating.velocity.VelocityTemplateEngine import org.silkframework.runtime.validation.ValidationException import org.scalatest.flatspec.AnyFlatSpec @@ -81,7 +83,7 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { |}""".stripMargin val template = new SparqlLegacyTemplate(VelocityTemplateEngine().compile(stringTemplate)) for(i <- 1 to 10) { - val rendered = template.generate(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i"), TaskProperties(Map.empty, Map.empty)) + val rendered = template.generate(Some(entityFromMap(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i"))), TaskProperties(Map.empty, Map.empty)).head rendered mustBe s"""SELECT * WHERE { | rdfs:label "some label $i" @@ -113,7 +115,14 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { } private def generate(templateString: String, bindings: Map[String, String]): String = { - new SparqlLegacyTemplate(VelocityTemplateEngine().compile(templateString)).generate(bindings, TaskProperties(Map.empty, Map.empty)) + val entity = if (bindings.isEmpty) None else Some(entityFromMap(bindings)) + new SparqlLegacyTemplate(VelocityTemplateEngine().compile(templateString)).generate(entity, TaskProperties(Map.empty, Map.empty)).head + } + + private def entityFromMap(values: Map[String, String]): Entity = { + val entries = values.toIndexedSeq + val schema = EntitySchema("", entries.map { case (k, _) => UntypedPath(k).asUntypedValueType }) + Entity("urn:test", entries.map { case (_, v) => Seq(v) }, schema) } def validate(template: String, batchSize: Int = 2): Unit = { From 77e643c38c3b0cee18bcadcdd3e8e47c7443244d Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 22 Apr 2026 16:10:40 +0200 Subject: [PATCH 41/63] Add validation of Jinja templates --- .../templating/SparqlJinjaTemplate.scala | 19 ++++++++----- .../templating/SparqlLegacyTemplate.scala | 22 +++------------ .../rdf/tasks/templating/SparqlTemplate.scala | 27 +++++++++++++++++++ .../templating/SparqlTemplateJinjaTest.scala | 23 ++++++++++++++++ 4 files changed, 66 insertions(+), 25 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala index 5cfae7d7be..6963605e97 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala @@ -4,8 +4,10 @@ import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.runtime.templating.{CompiledTemplate, TemplateVariableConversions, TemplateVariableName, TemplateVariableValue} +import org.silkframework.runtime.validation.ValidationException import java.io.StringWriter +import scala.util.{Failure, Success, Try} /** * SPARQL template implementation for the Jinja engine. @@ -37,15 +39,20 @@ class SparqlJinjaTemplate(template: CompiledTemplate) extends SparqlTemplate { // Seed every referenced variable with a URI-like default so that QueryFactory can parse the result. val genericUri = "urn:generic:1" val defaults = referencedVariables.distinct.map(v => new TemplateVariableValue(v.name, v.scope, Seq(genericUri))) - val writer = new StringWriter() - template.evaluate(defaults, writer) - writer.toString + Try { + val writer = new StringWriter() + template.evaluate(defaults, writer) + writer.toString + } match { + case Success(query) => query + case Failure(exception) => + throw new ValidationException( + "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) + } } override def validateUpdateQuery(batchSize: Int): Unit = { - // Without SPARQL rendering filters (| uri, | plainLiteral, ...), every entity reference inserts the - // raw string value with no escaping. Generating meaningful example queries for parse validation is - // therefore not possible in the current iteration. Skip validation until the filter follow-up lands. + SparqlTemplate.validateParseability(generateWithDefaults(), batchSize) } override def inputSchema: EntitySchema = { diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala index 9c46f618cb..9dff1b64a7 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala @@ -1,6 +1,5 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.apache.jena.update.UpdateFactory import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.entity.paths.UntypedPath import org.silkframework.execution.local.EmptyEntityTable @@ -62,24 +61,9 @@ class SparqlLegacyTemplate(template: CompiledTemplate) extends SparqlTemplate { } override def validateUpdateQuery(batchSize: Int): Unit = { - if (usesRawUnsafe) { - // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. - } else { - // Generate example input assignments - val sparqlQuery = generateWithDefaults() - Try(UpdateFactory.create(sparqlQuery)).failed.toOption.foreach { parseError => - throw new ValidationException( - "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparqlQuery) - } - if (batchSize > 1) { - val batchSparql = sparqlQuery + "\n" + sparqlQuery - Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => - throw new ValidationException( - "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) - } - } + if (!usesRawUnsafe) { + // Skipped for rawUnsafe templates: they can generate arbitrary SPARQL syntax so example-query validation is unreliable. + SparqlTemplate.validateParseability(generateWithDefaults(), batchSize) } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index b457d43dcd..161afcbcef 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -1,9 +1,13 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating +import org.apache.jena.update.UpdateFactory import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.runtime.plugin.PluginContext import org.silkframework.runtime.templating.{TemplateEngines, TemplateVariableValue} +import org.silkframework.runtime.validation.ValidationException + +import scala.util.Try /** * Compiled SPARQL template. Encapsulates rendering a SPARQL query from a template and the associated @@ -65,6 +69,29 @@ object SparqlTemplate { new SparqlLegacyTemplate(compiled) } } + + /** + * Verifies that a rendered example query parses as SPARQL Update, and — when batchSize > 1 — that two + * consecutive copies also parse (so batching in [[org.silkframework.plugins.dataset.rdf.executors.BatchSparqlUpdateEmitter]] + * produces valid queries). + * + * Shared between [[SparqlJinjaTemplate]] and [[SparqlLegacyTemplate]]. + */ + private[templating] def validateParseability(query: String, batchSize: Int): Unit = { + Try(UpdateFactory.create(query)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + + parseError.getMessage + ", example query: " + query) + } + if (batchSize > 1) { + val batchSparql = query + "\n" + query + Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + + parseError.getMessage + ", example batch query: " + batchSparql) + } + } + } } /** Makes properties of the input and output task of a SPARQL operator execution available to the template. */ diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala index 5e1881bfff..9304c9bfc4 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala @@ -7,6 +7,7 @@ import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.plugins.templating.jinja.JinjaTemplateEngine import org.silkframework.runtime.templating.TemplateVariableValue import org.silkframework.runtime.templating.exceptions.{TemplateEvaluationException, UnboundVariablesException} +import org.silkframework.runtime.validation.ValidationException class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { @@ -138,6 +139,28 @@ class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { } } + it should "validate Jinja SPARQL Update templates by parsing an example query" in { + val validTemplate = SparqlTemplate.create(JinjaTemplateEngine.id, + """INSERT DATA { <{{ input.entity.subject | validate_uri }}> "{{ input.entity.label | escape_literal }}" } ;""") + // Well-formed template with batch size > 1 must not throw. + validTemplate.validateUpdateQuery(batchSize = 2) + + // Template that forgets to wrap the URI variable with `<...>` produces unparseable SPARQL. + val unwrappedUri = SparqlTemplate.create(JinjaTemplateEngine.id, + """INSERT DATA { {{ input.entity.subject }} "x" } ;""") + intercept[ValidationException] { + unwrappedUri.validateUpdateQuery(batchSize = 1) + } + + // Template without a trailing `;` is fine for batchSize = 1 but fails for batchSize > 1. + val missingSemicolon = SparqlTemplate.create(JinjaTemplateEngine.id, + """INSERT DATA { <{{ input.entity.subject | validate_uri }}> "x" }""") + missingSemicolon.validateUpdateQuery(batchSize = 1) + intercept[ValidationException] { + missingSemicolon.validateUpdateQuery(batchSize = 2) + } + } + it should "expose multi-valued entity properties as lists iterable in the Jinja template" in { val template = SparqlTemplate.create(JinjaTemplateEngine.id, """{% for s in input.entity.subject %}INSERT DATA { <{{ s }}> "x" } ; From bc3342005bf6483600b8fab4954d2aa4e4ba61d8 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 22 Apr 2026 16:30:52 +0200 Subject: [PATCH 42/63] Introduce default RDF dataaset --- .../dataset/rdf/DefaultRdfDataset.scala | 28 +++++++++++++ .../executors/LocalSparqlSelectExecutor.scala | 42 +++++++++++++------ .../rdf/tasks/SparqlSelectCustomTask.scala | 12 +++++- 3 files changed, 68 insertions(+), 14 deletions(-) create mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/DefaultRdfDataset.scala diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/DefaultRdfDataset.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/DefaultRdfDataset.scala new file mode 100644 index 0000000000..2288cc400f --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/DefaultRdfDataset.scala @@ -0,0 +1,28 @@ +package org.silkframework.plugins.dataset.rdf + +import org.silkframework.dataset.Dataset +import org.silkframework.dataset.rdf.RdfDataset +import org.silkframework.runtime.plugin.{PluginContext, PluginRegistry} + +/** + * Resolves the RDF dataset configured under `dataset.defaultRdf`. + * + * Used by SPARQL query tasks that want to submit their query directly to a configured dataset + * rather than to an RDF dataset wired up via an input port. + */ +object DefaultRdfDataset { + + private val configKey = "dataset.defaultRdf" + + def resolve()(implicit pluginContext: PluginContext): RdfDataset = { + PluginRegistry.createFromConfigOption[Dataset](configKey) match { + case Some(rdf: RdfDataset) => + rdf + case Some(other) => + throw new IllegalStateException( + s"Plugin configured at '$configKey' is not an RdfDataset: ${other.getClass.getSimpleName}") + case None => + throw new IllegalStateException(s"No default RDF dataset configured at '$configKey'.") + } + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index 8c4fb90569..2266a0e6bd 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -7,6 +7,7 @@ import org.silkframework.entity.Entity import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution, LocalExecutor} import org.silkframework.execution.typed.SparqlEndpointEntitySchema import org.silkframework.execution.{ExecutionReport, ExecutionReportUpdater, ExecutorOutput, TaskException} +import org.silkframework.plugins.dataset.rdf.DefaultRdfDataset import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask import org.silkframework.plugins.dataset.rdf.tasks.templating.TaskProperties import org.silkframework.runtime.activity.{ActivityContext, UserContext} @@ -24,12 +25,16 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT context: ActivityContext[ExecutionReport]) (implicit pluginContext: PluginContext): Option[LocalEntities] = { val taskData = task.data + implicit val executionReportUpdater: SparqlSelectExecutionReportUpdater = SparqlSelectExecutionReportUpdater(task, context) inputs match { case Seq(SparqlEndpointEntitySchema(sparql)) => - implicit val executionReportUpdater: SparqlSelectExecutionReportUpdater = SparqlSelectExecutionReportUpdater(task, context) val entities = executeOnSparqlEndpoint(taskData, sparql.task, output.task, executionReportUpdater = Some(executionReportUpdater)) Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) + case Seq() if taskData.useDefaultDataset => + val rdfDataset = DefaultRdfDataset.resolve() + val entities = executeOnDefaultDataset(taskData, rdfDataset, output.task, executionReportUpdater = Some(executionReportUpdater)) + Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) case _ => throw TaskException("SPARQL select executor did not receive a SPARQL endpoint as requested!") } @@ -41,22 +46,33 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT limit: Int = Integer.MAX_VALUE, executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { - val selectLimit = math.min(sparqlSelectTask.intLimit.getOrElse(Integer.MAX_VALUE), limit) - val results = select(sparqlSelectTask, inputTask, outputTask, selectLimit) - val vars: IndexedSeq[String] = getSparqlVars(sparqlSelectTask) - createEntities(sparqlSelectTask, results, vars, executionReportUpdater) + runSelect(sparqlSelectTask, inputTask.data.plugin.sparqlEndpoint, Some(inputTask), outputTask, limit, executionReportUpdater) } - private def select(sparqlSelectTask: SparqlSelectCustomTask, - inputTask: Task[_ <: DatasetSpec[RdfDataset]], - outputTask: Option[Task[_ <: TaskSpec]], - selectLimit: Int) - (implicit pluginContext: PluginContext): SparqlResults = { + private def executeOnDefaultDataset(sparqlSelectTask: SparqlSelectCustomTask, + rdfDataset: RdfDataset, + outputTask: Option[Task[_ <: TaskSpec]], + limit: Int = Integer.MAX_VALUE, + executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) + (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { + runSelect(sparqlSelectTask, rdfDataset.sparqlEndpoint, None, outputTask, limit, executionReportUpdater) + } + + private def runSelect(sparqlSelectTask: SparqlSelectCustomTask, + sparqlEndpoint: SparqlEndpoint, + inputTask: Option[Task[_ <: TaskSpec]], + outputTask: Option[Task[_ <: TaskSpec]], + limit: Int, + executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) + (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { implicit val user: UserContext = pluginContext.user - val sparqlEndpoint = inputTask.data.plugin.sparqlEndpoint - val taskProperties = TaskProperties.create(Some(inputTask), outputTask, pluginContext) + val selectLimit = math.min(sparqlSelectTask.intLimit.getOrElse(Integer.MAX_VALUE), limit) + val taskProperties = TaskProperties.create(inputTask, outputTask, pluginContext) val templateVariables = pluginContext.templateVariables.all.variables - executeSelect(sparqlEndpoint, sparqlSelectTask.queryTemplate.generate(None, taskProperties, templateVariables).head, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + val query = sparqlSelectTask.queryTemplate.generate(None, taskProperties, templateVariables).head + val results = executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + val vars: IndexedSeq[String] = getSparqlVars(sparqlSelectTask) + createEntities(sparqlSelectTask, results, vars, executionReportUpdater) } /** diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index fba6cb66f9..b5fe4c3443 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -47,6 +47,12 @@ case class SparqlSelectCustomTask( autoCompletionProvider = classOf[SparqlEndpointDatasetAutoCompletionProvider], autoCompleteValueWithLabels = true, allowOnlyAutoCompletedValues = true) optionalInputDataset: SparqlEndpointDatasetParameter = SparqlEndpointDatasetParameter(""), + @Param( + label = "Use default RDF dataset", + value = "If enabled, the task no longer exposes a SPARQL endpoint input port. Instead the SELECT query is submitted" + + " directly to the RDF dataset." + ) + useDefaultDataset: Boolean = false, @Param( label = "SPARQL query timeout (ms)", value = "SPARQL query timeout (select/update) in milliseconds. A value of zero means that there is no timeout set explicitly." + @@ -68,7 +74,11 @@ case class SparqlSelectCustomTask( val queryTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, selectQuery.str) override def inputPorts: InputPorts = { - FixedNumberOfInputs(Seq(FixedSchemaPort(SparqlEndpointEntitySchema.schema))) + if (useDefaultDataset) { + InputPorts.NoInputPorts + } else { + FixedNumberOfInputs(Seq(FixedSchemaPort(SparqlEndpointEntitySchema.schema))) + } } override def outputPort: Option[Port] = { From 7b825fb40caf001d79a05be08a50013dd50c97f4 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 22 Apr 2026 16:45:53 +0200 Subject: [PATCH 43/63] SparqlSelectCustomTask: Support reading entities. --- .../executors/LocalSparqlSelectExecutor.scala | 46 +++++++++++++++++-- .../rdf/tasks/SparqlSelectCustomTask.scala | 14 ++++-- .../LocalSparqlSelectExecutorTest.scala | 35 +++++++++++++- 3 files changed, 86 insertions(+), 9 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index 2266a0e6bd..c0b20189be 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -2,7 +2,7 @@ package org.silkframework.plugins.dataset.rdf.executors import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.dataset.{DataSource, DatasetSpec} -import org.silkframework.dataset.rdf.{RdfDataset, SparqlEndpoint, SparqlResults} +import org.silkframework.dataset.rdf.{RdfDataset, RdfNode, SparqlEndpoint, SparqlResults} import org.silkframework.entity.Entity import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution, LocalExecutor} import org.silkframework.execution.typed.SparqlEndpointEntitySchema @@ -14,6 +14,8 @@ import org.silkframework.runtime.activity.{ActivityContext, UserContext} import org.silkframework.runtime.iterator.CloseableIterator import org.silkframework.runtime.plugin.PluginContext +import scala.collection.immutable.SortedMap + /** * Local executor for [[SparqlSelectCustomTask]]. */ @@ -35,6 +37,10 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT val rdfDataset = DefaultRdfDataset.resolve() val entities = executeOnDefaultDataset(taskData, rdfDataset, output.task, executionReportUpdater = Some(executionReportUpdater)) Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) + case Seq(input) if taskData.useDefaultDataset => + val rdfDataset = DefaultRdfDataset.resolve() + val entities = executeOnDefaultDatasetPerEntity(taskData, rdfDataset, input, output.task, executionReportUpdater = Some(executionReportUpdater)) + Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) case _ => throw TaskException("SPARQL select executor did not receive a SPARQL endpoint as requested!") } @@ -58,6 +64,36 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT runSelect(sparqlSelectTask, rdfDataset.sparqlEndpoint, None, outputTask, limit, executionReportUpdater) } + def executeOnDefaultDatasetPerEntity(sparqlSelectTask: SparqlSelectCustomTask, + rdfDataset: RdfDataset, + input: LocalEntities, + outputTask: Option[Task[_ <: TaskSpec]], + limit: Int = Integer.MAX_VALUE, + executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) + (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { + implicit val user: UserContext = pluginContext.user + val sparqlEndpoint = rdfDataset.sparqlEndpoint + val selectLimit = math.min(sparqlSelectTask.intLimit.getOrElse(Integer.MAX_VALUE), limit) + val taskProperties = TaskProperties.create(Some(input.task), outputTask, pluginContext) + val templateVariables = pluginContext.templateVariables.all.variables + val expectedSchema = sparqlSelectTask.expectedInputSchema + val vars = getSparqlVars(sparqlSelectTask) + + val bindings = input.entities.flatMap { entity => + val values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) + if (values.forall(_.nonEmpty)) { + val projected = Entity(entity.uri, values, expectedSchema) + val queries = sparqlSelectTask.queryTemplate.generate(Some(projected), taskProperties, templateVariables) + queries.iterator.flatMap { query => + executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)).bindings + } + } else { + Iterator.empty + } + } + createEntities(sparqlSelectTask, bindings, vars, executionReportUpdater) + } + private def runSelect(sparqlSelectTask: SparqlSelectCustomTask, sparqlEndpoint: SparqlEndpoint, inputTask: Option[Task[_ <: TaskSpec]], @@ -72,7 +108,7 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT val query = sparqlSelectTask.queryTemplate.generate(None, taskProperties, templateVariables).head val results = executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) val vars: IndexedSeq[String] = getSparqlVars(sparqlSelectTask) - createEntities(sparqlSelectTask, results, vars, executionReportUpdater) + createEntities(sparqlSelectTask, results.bindings, vars, executionReportUpdater) } /** @@ -110,12 +146,12 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT } private def createEntities(taskData: SparqlSelectCustomTask, - results: SparqlResults, + bindings: CloseableIterator[SortedMap[String, RdfNode]], vars: IndexedSeq[String], executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]): CloseableIterator[Entity] = { implicit val prefixes: Prefixes = Prefixes.empty var schemaReported = false - val increase: (Entity => Unit) = (entity: Entity) => executionReportUpdater match { + val increase: Entity => Unit = (entity: Entity) => executionReportUpdater match { case Some(updater) => if (!schemaReported) { schemaReported = true @@ -127,7 +163,7 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT } var count = 0 - val entityIterator = results.bindings.map { binding => + val entityIterator = bindings.map { binding => count += 1 val values = vars map { v => binding.get(v).toSeq.map(_.value) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index b5fe4c3443..54a1f4d3f1 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -49,8 +49,8 @@ case class SparqlSelectCustomTask( optionalInputDataset: SparqlEndpointDatasetParameter = SparqlEndpointDatasetParameter(""), @Param( label = "Use default RDF dataset", - value = "If enabled, the task no longer exposes a SPARQL endpoint input port. Instead the SELECT query is submitted" + - " directly to the RDF dataset." + value = "If enabled, the SELECT query is submitted directly to the configured default RDF dataset." + + " If the query template references input entities, one query is generated per input entity." ) useDefaultDataset: Boolean = false, @Param( @@ -73,9 +73,17 @@ case class SparqlSelectCustomTask( val queryTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, selectQuery.str) + def isStaticTemplate: Boolean = queryTemplate.isStaticTemplate + + def expectedInputSchema: EntitySchema = queryTemplate.inputSchema + override def inputPorts: InputPorts = { if (useDefaultDataset) { - InputPorts.NoInputPorts + if (isStaticTemplate) { + InputPorts.NoInputPorts + } else { + FixedNumberOfInputs(Seq(FixedSchemaPort(expectedInputSchema))) + } } else { FixedNumberOfInputs(Seq(FixedSchemaPort(SparqlEndpointEntitySchema.schema))) } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index 51a6878746..8ac78499a3 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -6,7 +6,9 @@ import org.scalatest.matchers.must.Matchers import org.silkframework.config.{PlainTask, Task} import org.silkframework.dataset.rdf._ import org.silkframework.dataset.{DataSource, DatasetSpec, EntitySink, LinkSink} -import org.silkframework.entity.Entity +import org.silkframework.entity.paths.TypedPath +import org.silkframework.entity.{Entity, EntitySchema, ValueType} +import org.silkframework.execution.local.GenericEntityTable import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask import org.silkframework.runtime.activity.{TestUserContextTrait, UserContext} import org.silkframework.runtime.iterator.{CloseableIterator, TraversableIterator} @@ -64,6 +66,37 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec correctTimeout mustBe true } + it should "generate one query per input entity when useDefaultDataset is set and the template references entity values" in { + val query = """SELECT ?p ?o WHERE { <{{ input.entity.s }}> ?p ?o }""" + val rowsPerQuery = 2 + val task = SparqlSelectCustomTask(query, limit = rowsPerQuery.toString, useDefaultDataset = true) + + val capturedQueries = collection.mutable.ArrayBuffer.empty[String] + val sparqlEndpoint = sparqlEndpointStub(queryCapture = q => capturedQueries += q) + val stubDataset = new StubRdfDataset(sparqlEndpoint) + + val inputSchema = EntitySchema("", typedPaths = IndexedSeq(TypedPath("s", ValueType.URI))) + val inputEntities = Seq( + Entity("urn:in:1", IndexedSeq(Seq("http://example.org/a")), inputSchema), + Entity("urn:in:2", IndexedSeq(Seq("http://example.org/b")), inputSchema), + Entity("urn:in:3", IndexedSeq(Seq()), inputSchema) // skipped: missing value + ) + val inputTable = GenericEntityTable(inputEntities, inputSchema, PlainTask("inputTask", DatasetSpec(stubDataset))) + + val activityContextMock = TestMocks.activityContextMock() + val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) + + val results = LocalSparqlSelectExecutor() + .executeOnDefaultDatasetPerEntity(task, stubDataset, inputTable, outputTask = None, executionReportUpdater = Some(reportUpdater)) + .toList + + capturedQueries.toSeq must have size 2 + capturedQueries(0) must include ("") + capturedQueries(1) must include ("") + // Bindings from both queries are flattened into the output: rowsPerQuery rows × 2 queries. + results.size mustBe (rowsPerQuery * 2) + } + it should "evaluate a Jinja query template using the graph variable from the task parameters" in { val graphUri = "http://example.org/testGraph" val query = """SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } }""" From efba448c1b22d0e07ab45e51e78fa496c8557556 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 22 Apr 2026 16:50:36 +0200 Subject: [PATCH 44/63] Add default scope parameter --- .../rdf/tasks/SparqlSelectCustomTask.md | 5 ++++ .../rdf/tasks/SparqlSelectCustomTask.scala | 13 ++++++++-- .../templating/SparqlJinjaTemplate.scala | 25 ++++++++++++++++--- .../rdf/tasks/templating/SparqlTemplate.scala | 8 ++++-- .../templating/SparqlTemplateJinjaTest.scala | 24 ++++++++++++++++++ 5 files changed, 68 insertions(+), 7 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index 43008aa9a8..d43ef19fd7 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -34,6 +34,11 @@ SELECT * WHERE { GRAPH <{{ input.config.graph | validate_uri }}> { ?s ?p ?o } } Parameter and variable names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript access is not supported. +The `defaultScope` parameter (default `input.entity`) makes variables from one scope accessible directly, without +the scope prefix. With the default, a template may reference `{{ property }}` as a shorthand for +`{{ input.entity.property }}`; both forms resolve to the same value. Setting `defaultScope` to the empty string +disables this aliasing and requires every variable to be addressed with its full scope. + Values are inserted verbatim by default, so URI brackets (`<...>`) and quotation marks around literals must be written in the template. The following filters are provided to render values safely: diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 54a1f4d3f1..252fe5f46e 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -64,14 +64,23 @@ case class SparqlSelectCustomTask( autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider], autoCompleteValueWithLabels = true ) - templatingMode: String = "jinja" + templatingMode: String = "jinja", + @Param( + label = "Default scope", + value = "Variables from this scope can be accessed without the scope prefix. " + + "For example, with default scope 'input.entity', a template may reference '{{ property }}' instead of '{{ input.entity.property }}'. " + + "Leave empty to disable." + ) + defaultScope: String = "input.entity" ) extends CustomTask { val intLimit: Option[Int] = { // Only allow positive ints Try(limit.toInt).filter(_ > 0).toOption } - val queryTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, selectQuery.str) + private val defaultScopePath: Seq[String] = defaultScope.split('.').map(_.trim).filter(_.nonEmpty).toSeq + + val queryTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, selectQuery.str, defaultScopePath) def isStaticTemplate: Boolean = queryTemplate.isStaticTemplate diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala index 6963605e97..da8d67c7a8 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala @@ -21,8 +21,13 @@ import scala.util.{Failure, Success, Try} * {{ global. }} -- global template variable * * Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`) + * + * @param defaultScope If non-empty, every variable at this scope is also exposed at the top level of the + * Jinja context, so the template may reference it without the scope prefix. For example, + * with `defaultScope = Seq("input", "entity")`, a template may use `{{ property }}` in + * place of `{{ input.entity.property }}`. */ -class SparqlJinjaTemplate(template: CompiledTemplate) extends SparqlTemplate { +class SparqlJinjaTemplate(template: CompiledTemplate, defaultScope: Seq[String] = Seq.empty) extends SparqlTemplate { import SparqlJinjaTemplate._ @@ -78,7 +83,14 @@ class SparqlJinjaTemplate(template: CompiledTemplate) extends SparqlTemplate { val outputConfig = taskProperties.outputTask.map { case (k, v) => new TemplateVariableValue(k, OUTPUT_CONFIG_SCOPE, Seq(v)) } - (inputConfig ++ inputEntity ++ outputConfig).toSeq ++ templateVariables + val scoped = (inputConfig ++ inputEntity ++ outputConfig).toSeq ++ templateVariables + val aliased = + if (defaultScope.nonEmpty) { + scoped.filter(_.scope == defaultScope).map(v => new TemplateVariableValue(v.name, Seq.empty, v.values)) + } else { + Seq.empty + } + scoped ++ aliased } private def referencedVariables: Seq[TemplateVariableName] = { @@ -86,7 +98,14 @@ class SparqlJinjaTemplate(template: CompiledTemplate) extends SparqlTemplate { } private def entityPropertyNames: Seq[String] = { - referencedVariables.filter(_.scope == INPUT_ENTITY_SCOPE).map(_.name).distinct + val scoped = referencedVariables.filter(_.scope == INPUT_ENTITY_SCOPE).map(_.name) + val aliased = + if (defaultScope == INPUT_ENTITY_SCOPE) { + referencedVariables.filter(_.scope.isEmpty).map(_.name) + } else { + Seq.empty + } + (scoped ++ aliased).distinct } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index 161afcbcef..3bbf21fa4f 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -59,12 +59,16 @@ object SparqlTemplate { /** * Creates a SPARQL template using the given template engine. + * + * @param defaultScope Scope whose variables are additionally exposed at the top level of the Jinja context, + * so templates may reference them without the scope prefix. Only honored by the Jinja + * implementation. Pass `Seq.empty` to disable aliasing. */ - def create(templateEngineId: String, template: String): SparqlTemplate = { + def create(templateEngineId: String, template: String, defaultScope: Seq[String] = Seq.empty): SparqlTemplate = { val engine = TemplateEngines.create(templateEngineId) val compiled = engine.compile(template) if (templateEngineId == JINJA_ENGINE_ID) { - new SparqlJinjaTemplate(compiled) + new SparqlJinjaTemplate(compiled, defaultScope) } else { new SparqlLegacyTemplate(compiled) } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala index 9304c9bfc4..1b97e52ec2 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala @@ -161,6 +161,30 @@ class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { } } + it should "alias input.entity variables to bare references when defaultScope = input.entity" in { + val template = SparqlTemplate.create(JinjaTemplateEngine.id, + """<{{ subject }}> "{{ input.entity.label }}"""", + defaultScope = Seq("input", "entity")) + template.inputSchema.typedPaths.flatMap(_.property).map(_.propertyUri.uri).toSet mustBe Set("subject", "label") + val rendered = template.generate( + entity = Some(entityFromMap(Map("subject" -> "urn:entity:1", "label" -> "hello"))), + taskProperties = TaskProperties(Map.empty, Map.empty) + ).head + rendered mustBe """ "hello"""" + } + + it should "alias variables from an arbitrary scope without polluting the input entity schema" in { + val template = SparqlTemplate.create(JinjaTemplateEngine.id, + """SELECT * WHERE { GRAPH <{{ graph }}> { ?s ?p ?o } }""", + defaultScope = Seq("input", "config")) + val rendered = template.generate( + entity = None, + taskProperties = TaskProperties(Map("graph" -> "urn:graph:1"), Map.empty) + ).head + rendered must include("") + template.inputSchema.typedPaths mustBe empty + } + it should "expose multi-valued entity properties as lists iterable in the Jinja template" in { val template = SparqlTemplate.create(JinjaTemplateEngine.id, """{% for s in input.entity.subject %}INSERT DATA { <{{ s }}> "x" } ; From 638578b20429b818b392783463138e43fd404013 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 23 Apr 2026 14:00:52 +0200 Subject: [PATCH 45/63] Update the order of the SparqlSelectCustomTask parameter --- .../rdf/tasks/SparqlSelectCustomTask.scala | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 252fe5f46e..970b1890d4 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -23,10 +23,7 @@ import scala.util.Try @Plugin( id = "sparqlSelectOperator", label = "SPARQL Select query", - description = - "A task that executes a SPARQL Select query on a SPARQL enabled data source and outputs the SPARQL result." + - " If the SPARQL source is defined on a specific graph, a FROM clause will be added to the query at execution time," + - " except when there already exists a GRAPH or FROM clause in the query. FROM NAMED clauses are not injected.", + description = "A task that executes a SPARQL Select query and outputs the SPARQL result.", documentationFile = "SparqlSelectCustomTask.md", iconFile = "sparql-select-query.svg" ) @@ -53,12 +50,6 @@ case class SparqlSelectCustomTask( " If the query template references input entities, one query is generated per input entity." ) useDefaultDataset: Boolean = false, - @Param( - label = "SPARQL query timeout (ms)", - value = "SPARQL query timeout (select/update) in milliseconds. A value of zero means that there is no timeout set explicitly." + - " If a value greater zero is specified this overwrites possible default timeouts." - ) - sparqlTimeout: Int = 0, @Param( value = "The templating mode for the template engine.", autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider], @@ -67,11 +58,18 @@ case class SparqlSelectCustomTask( templatingMode: String = "jinja", @Param( label = "Default scope", - value = "Variables from this scope can be accessed without the scope prefix. " + + value = "Variables from this scope can be accessed without the scope prefix in Jinja. " + "For example, with default scope 'input.entity', a template may reference '{{ property }}' instead of '{{ input.entity.property }}'. " + "Leave empty to disable." ) - defaultScope: String = "input.entity" + defaultScope: String = "input.entity", + @Param( + label = "SPARQL query timeout (ms)", + value = "SPARQL query timeout (select/update) in milliseconds. A value of zero means that there is no timeout set explicitly." + + " If a value greater zero is specified this overwrites possible default timeouts.", + advanced = true + ) + sparqlTimeout: Int = 0, ) extends CustomTask { val intLimit: Option[Int] = { // Only allow positive ints From 3a436e9ab21d542725d48ebbabfc6965bf689896 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 23 Apr 2026 14:17:00 +0200 Subject: [PATCH 46/63] Update the SparqlSelectCustomTask doc --- .../rdf/tasks/SparqlSelectCustomTask.md | 52 ++++++++++++------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index d43ef19fd7..f562209724 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -1,16 +1,19 @@ -The SPARQL SELECT plugin is a task for executing SPARQL SELECT queries on the input RDF data source. +The SPARQL SELECT plugin is a task for executing SPARQL SELECT queries on an RDF data source. -## Description +It can be used in a workflow, connecting an input to an output. The _output_ is an entity table containing the _SPARQL results_ of the query +execution. A [SPARQL 1.1 SELECT](https://www.w3.org/TR/sparql11-query/#select) query is supported; the simplest +example is `SELECT * WHERE { ?s ?p ?o }`. -The `sparqlSelectOperator` plugin is an example of a _RDF task_ or _operator_. Such a task can be used in a workflow, -connecting an input to an output. In this specific case, the _input_ is — in essence — a _SPARQL endpoint_ and the -_output_ is the entity table containing the _SPARQL results_ of the SPARQL SELECT query execution. +The _input_ depends on the configuration: -In general terms, a [SPARQL 1.1 SELECT](https://www.w3.org/TR/sparql11-query/#select) query is supported. One of the -simplest examples is `SELECT * WHERE { ?s ?p ?o }`. +- By default, the query is executed against the connected input, which must be a _SPARQL endpoint_ + (i.e. an RDF dataset). +- When **Use default RDF dataset** (`useDefaultDataset`) is enabled, the query is executed against the project's + default RDF dataset instead. If the template references input entity properties, the task accepts an entity + input and generates one query per entity; otherwise it needs no input at all. -The [result limit](https://www.w3.org/TR/sparql11-query/#modResultLimit) can be specified for the SPARQL SELECT plugin -itself, with the parameter `limit`. Additionally, a timeout can be specified with the parameter `sparqlTimeout`. +The [result limit](https://www.w3.org/TR/sparql11-query/#modResultLimit) can be restricted with the `limit` +parameter. A query timeout (in milliseconds) can be set via `sparqlTimeout`. As usual, the SPARQL results contain both "variables" and "bindings", such as in [this example](https://www.w3.org/TR/sparql11-results-json/#json-result-object). @@ -18,10 +21,16 @@ This tabular raw form is transformed into an _entity table_. ### Templating -The select query supports [Jinja](https://jinja.palletsprojects.com/) templating. The following variables are available: +The select query is rendered by a template engine. +[`Jinja`](https://jinja.palletsprojects.com/) is the default and is described below; for the deprecated `Simple` +and `Velocity Engine` modes, see the "Legacy template engines" section further down. + +The following variables are available: - `input.config.`: a parameter of the connected input task. - `output.config.`: a parameter of the connected output task. +- `input.entity.`: the value of the given property on the current input entity. Only populated when + the task is configured to receive input entities (see **Use default RDF dataset** above). - `project.`: a project-scoped template variable. - `global.`: a global template variable. @@ -31,13 +40,14 @@ For example, to query the named graph that is configured on the input dataset: SELECT * WHERE { GRAPH <{{ input.config.graph | validate_uri }}> { ?s ?p ?o } } ``` -Parameter and variable names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript access -is not supported. +Parameter, property and variable names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); +bracket-subscript access such as `input.entity["urn:prop:label"]` is not supported. -The `defaultScope` parameter (default `input.entity`) makes variables from one scope accessible directly, without -the scope prefix. With the default, a template may reference `{{ property }}` as a shorthand for -`{{ input.entity.property }}`; both forms resolve to the same value. Setting `defaultScope` to the empty string -disables this aliasing and requires every variable to be addressed with its full scope. +The `defaultScope` parameter declares one scope whose variables are additionally exposed at the top level of the +template context, so they can be referenced without the scope prefix. It defaults to `input.entity`: a template +may write `{{ property }}` as a shorthand for `{{ input.entity.property }}`, and both forms resolve to the same +value. Set `defaultScope` to the empty string to disable this aliasing and require every variable to be addressed +with its full scope. Values are inserted verbatim by default, so URI brackets (`<...>`) and quotation marks around literals must be written in the template. The following filters are provided to render values safely: @@ -55,12 +65,18 @@ All transformer plugins are also available as Jinja filters under their plugin i The output schema (i.e. the result variables) is derived from the query at configuration time by evaluating the template with default values, so the query must remain valid SPARQL regardless of the parameter values. -### Internal Specifics +### Automatic `FROM` clause injection If the SPARQL source is defined on a specific graph, a `FROM` clause will be added to the query at execution time, except when there already exists a `GRAPH` or `FROM` clause in the query. `FROM NAMED` clauses are not injected. -## Related plugins +### Legacy template engines + +In addition to Jinja, two deprecated template engines are supported for backwards compatibility: `Simple` +and [`Velocity Engine`](https://velocity.apache.org/engine/2.4.1/user-guide.html). Their syntax is identical +to the one used by the `SPARQL Update operator` and is documented there. + +### Related plugins Other types of RDF tasks are the `sparqlCopyOperator` for executing SPARQL CONSTRUCT queries, and the `sparqlUpdateOperator` for building SPARQL UPDATE queries from a templating engine. From 116b6444da9e53b317725e9b565637e01be56f53 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Thu, 23 Apr 2026 14:45:27 +0200 Subject: [PATCH 47/63] Fix merge issues --- libs/gui-elements | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/gui-elements b/libs/gui-elements index d5f36c7ec9..64359f57da 160000 --- a/libs/gui-elements +++ b/libs/gui-elements @@ -1 +1 @@ -Subproject commit d5f36c7ec978c031a63cb2f09777515cb271de03 +Subproject commit 64359f57dad3410bef34cc87d2f1e8a7cd7b8708 From 9ae3a440cdcf518f0403f8d6284c653933f194c4 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Fri, 24 Apr 2026 14:26:13 +0200 Subject: [PATCH 48/63] Remove incomplete validation of SPARQL jinja templates. Replaced by heuristic to get the output schema. --- .../rdf/tasks/SparqlSelectCustomTask.md | 5 +- .../rdf/tasks/SparqlSelectCustomTask.scala | 25 ++---- .../templating/SparqlJinjaTemplate.scala | 21 ++++- .../templating/SparqlLegacyTemplate.scala | 18 +++- .../templating/SparqlSelectVarExtractor.scala | 87 +++++++++++++++++++ .../rdf/tasks/templating/SparqlTemplate.scala | 22 ++--- ...st.scala => SparqlJinjaTemplateTest.scala} | 2 +- .../SparqlSelectVarExtractorTest.scala | 83 ++++++++++++++++++ 8 files changed, 221 insertions(+), 42 deletions(-) create mode 100644 silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractor.scala rename silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/{SparqlTemplateJinjaTest.scala => SparqlJinjaTemplateTest.scala} (99%) create mode 100644 silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractorTest.scala diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index a1e2453df3..c8bdd87980 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -62,8 +62,9 @@ written in the template. The following filters are provided to render values saf All transformer plugins are also available as Jinja filters under their plugin id (for example `lowerCase`, `trim`, `urlEncode`). -The output schema (i.e. the result variables) is derived from the query at configuration time by evaluating the -template with default values, so the query must remain valid SPARQL regardless of the parameter values. +The output schema (i.e. the result variables) is derived from the query via a heuristic on the raw template text, +without evaluating it. If the heuristic cannot determine any output variables (for example, when the `SELECT` clause itself is produced +by a Jinja expression), the output port is reported with an unknown schema instead. ### Automatic `FROM` clause injection diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index cfd50cd3ac..3326cdc39f 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -1,20 +1,15 @@ package org.silkframework.plugins.dataset.rdf.tasks -import org.apache.jena.query.QueryFactory import org.silkframework.config._ import org.silkframework.dataset.rdf.SparqlEndpointDatasetParameter import org.silkframework.entity._ -import org.silkframework.entity.paths.{TypedPath, UntypedPath} import org.silkframework.execution.typed.SparqlEndpointEntitySchema import org.silkframework.plugins.dataset.rdf.datasets.SparqlDataset import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate import org.silkframework.runtime.plugin.annotations.{Param, Plugin, PluginReference} import org.silkframework.runtime.plugin.types.SparqlCodeParameter import org.silkframework.runtime.templating.TemplateEngineAutocompletionProvider -import org.silkframework.runtime.validation.ValidationException -import org.silkframework.util.Uri -import scala.jdk.CollectionConverters.ListHasAsScala import scala.util.Try /** @@ -108,22 +103,14 @@ case class SparqlSelectCustomTask( } override def outputPort: Option[Port] = { - Some(FixedSchemaPort(outputSchema)) - } - - val outputSchema: EntitySchema = { - val query = QueryFactory.create(queryTemplate.generateWithDefaults()) - if (!query.isSelectType) { - throw new ValidationException("Query is not a SELECT query!") - } - val typedPaths = query.getResultVars.asScala map { v => - TypedPath(UntypedPath(v), ValueType.STRING, isAttribute = false) + if (outputSchema.typedPaths.isEmpty) { + Some(UnknownSchemaPort) + } else { + Some(FixedSchemaPort(outputSchema)) } - EntitySchema( - typeUri = Uri(""), - typedPaths = typedPaths.toIndexedSeq - ) } + + val outputSchema: EntitySchema = queryTemplate.outputSchema } object SparqlSelectCustomTask { diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala index da8d67c7a8..540be5466e 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala @@ -1,10 +1,11 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.silkframework.entity.{Entity, EntitySchema} -import org.silkframework.entity.paths.UntypedPath +import org.silkframework.entity.paths.{TypedPath, UntypedPath} +import org.silkframework.entity.{Entity, EntitySchema, ValueType} import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.runtime.templating.{CompiledTemplate, TemplateVariableConversions, TemplateVariableName, TemplateVariableValue} +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateVariableConversions, TemplateVariableName, TemplateVariableValue} import org.silkframework.runtime.validation.ValidationException +import org.silkframework.util.Uri import java.io.StringWriter import scala.util.{Failure, Success, Try} @@ -22,15 +23,19 @@ import scala.util.{Failure, Success, Try} * * Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`) * + * @param rawTemplate The raw template source. Compiled internally via the Jinja engine and also used + * to derive the output schema heuristically without rendering. * @param defaultScope If non-empty, every variable at this scope is also exposed at the top level of the * Jinja context, so the template may reference it without the scope prefix. For example, * with `defaultScope = Seq("input", "entity")`, a template may use `{{ property }}` in * place of `{{ input.entity.property }}`. */ -class SparqlJinjaTemplate(template: CompiledTemplate, defaultScope: Seq[String] = Seq.empty) extends SparqlTemplate { +class SparqlJinjaTemplate(rawTemplate: String, defaultScope: Seq[String] = Seq.empty) extends SparqlTemplate { import SparqlJinjaTemplate._ + private val template: CompiledTemplate = TemplateEngines.create(JINJA_ENGINE_ID).compile(rawTemplate) + override def generate(entity: Option[Entity], taskProperties: TaskProperties, templateVariables: Seq[TemplateVariableValue] = Seq.empty): Iterable[String] = { @@ -69,6 +74,12 @@ class SparqlJinjaTemplate(template: CompiledTemplate, defaultScope: Seq[String] } } + override lazy val outputSchema: EntitySchema = { + val vars = SparqlSelectVarExtractor.extractSelectVars(rawTemplate) + val paths = vars.map(v => TypedPath(UntypedPath(v), ValueType.STRING, isAttribute = false)) + EntitySchema(typeUri = Uri(""), typedPaths = paths.toIndexedSeq) + } + override def isStaticTemplate: Boolean = { entityPropertyNames.isEmpty } @@ -111,6 +122,8 @@ class SparqlJinjaTemplate(template: CompiledTemplate, defaultScope: Seq[String] object SparqlJinjaTemplate { + private[templating] final val JINJA_ENGINE_ID = "jinja" + private[templating] final val INPUT_CONFIG_SCOPE: Seq[String] = Seq("input", "config") private[templating] final val INPUT_ENTITY_SCOPE: Seq[String] = Seq("input", "entity") private[templating] final val OUTPUT_CONFIG_SCOPE: Seq[String] = Seq("output", "config") diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala index 9dff1b64a7..3c9cf27846 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala @@ -1,13 +1,16 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.silkframework.entity.{Entity, EntitySchema} -import org.silkframework.entity.paths.UntypedPath +import org.apache.jena.query.QueryFactory +import org.silkframework.entity.paths.{TypedPath, UntypedPath} +import org.silkframework.entity.{Entity, EntitySchema, ValueType} import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlLegacyTemplate._ import org.silkframework.runtime.templating.{CompiledTemplate, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} import org.silkframework.runtime.validation.ValidationException +import org.silkframework.util.Uri import java.io.StringWriter +import scala.jdk.CollectionConverters.ListHasAsScala import scala.util.{Failure, Success, Try} /** @@ -76,6 +79,17 @@ class SparqlLegacyTemplate(template: CompiledTemplate) extends SparqlTemplate { } } + override lazy val outputSchema: EntitySchema = { + val query = QueryFactory.create(generateWithDefaults()) + if (!query.isSelectType) { + throw new ValidationException("Query is not a SELECT query!") + } + val typedPaths = query.getResultVars.asScala.map { v => + TypedPath(UntypedPath(v), ValueType.STRING, isAttribute = false) + } + EntitySchema(typeUri = Uri(""), typedPaths = typedPaths.toIndexedSeq) + } + override def isStaticTemplate: Boolean = { sparqlVariables match { case Some(vars) => vars.isEmpty diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractor.scala new file mode 100644 index 0000000000..3851c86073 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractor.scala @@ -0,0 +1,87 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +/** + * Best-effort heuristic that extracts the projected result variables from a SPARQL SELECT query. + * + * Unlike a full SPARQL parser, this works on raw template text that may still contain unevaluated + * placeholders (Jinja, Velocity, ...). It is used where rendering-with-defaults would fail because + * placeholders sit in contexts that do not accept a seed URI (string literals, numeric limits, etc.). + * + * Strategy: + * + * 1. Locate the first `SELECT` keyword (word-bounded, case-insensitive). + * 2. Find the end of the projection clause: the first `WHERE` / `FROM` keyword or `{`. + * 3. Strip a leading `DISTINCT` / `REDUCED`. + * 4. If the projection is `*`, fall back to collecting every distinct `?var` token in the full query. + * 5. Otherwise, walk the projection tracking parenthesis depth. At depth 0 collect `?var` directly. + * For each `( ... )` group, extract the alias of the last `AS ?alias` inside it. + * + * Returns an empty sequence when no match can be found (non-SELECT templates, or templates whose + * projection itself is produced by a placeholder). + */ +object SparqlSelectVarExtractor { + + private val selectKeywordPattern = """(?i)\bSELECT\b""".r + private val whereKeywordPattern = """(?i)\bWHERE\b""".r + private val fromKeywordPattern = """(?i)\bFROM\b""".r + private val distinctReducedPattern = """(?i)^(?:DISTINCT|REDUCED)\s+""".r + private val anyVarPattern = """\?([A-Za-z_][A-Za-z0-9_]*)""".r + private val asAliasPattern = """(?i)\bAS\s+\?([A-Za-z_][A-Za-z0-9_]*)""".r + + def extractSelectVars(query: String): Seq[String] = { + selectKeywordPattern.findFirstMatchIn(query) match { + case None => Seq.empty + case Some(m) => + val afterSelect = query.substring(m.end) + val boundary = projectionBoundary(afterSelect) + val projection = distinctReducedPattern.replaceFirstIn(afterSelect.substring(0, boundary).trim, "") + if (projection.trim == "*") { + anyVarPattern.findAllMatchIn(query).map(_.group(1)).toSeq.distinct + } else { + extractProjectedVars(projection) + } + } + } + + private def projectionBoundary(afterSelect: String): Int = { + val candidates = Seq( + whereKeywordPattern.findFirstMatchIn(afterSelect).map(_.start), + fromKeywordPattern.findFirstMatchIn(afterSelect).map(_.start), + Some(afterSelect.indexOf('{')).filter(_ >= 0) + ).flatten + if (candidates.isEmpty) afterSelect.length else candidates.min + } + + private def extractProjectedVars(projection: String): Seq[String] = { + val vars = scala.collection.mutable.ArrayBuffer.empty[String] + var depth = 0 + var parenStart = 0 + var i = 0 + while (i < projection.length) { + projection.charAt(i) match { + case '(' => + if (depth == 0) parenStart = i + depth += 1 + i += 1 + case ')' => + depth -= 1 + if (depth == 0) { + val content = projection.substring(parenStart + 1, i) + asAliasPattern.findAllMatchIn(content).toSeq.lastOption.foreach(m => vars += m.group(1)) + } + i += 1 + case '?' if depth == 0 => + val start = i + 1 + var j = start + while (j < projection.length && isVarChar(projection.charAt(j))) j += 1 + if (j > start) vars += projection.substring(start, j) + i = j + case _ => + i += 1 + } + } + vars.toSeq.distinct + } + + private def isVarChar(c: Char): Boolean = c.isLetterOrDigit || c == '_' +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index 3bbf21fa4f..b338b73f1e 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -25,13 +25,9 @@ trait SparqlTemplate { /** * Renders the template. * - * @param entity The current input entity, or `None` for static templates / Select queries. - * The Jinja implementation exposes each entity property as a list of values - * under `input.entity.*`. The legacy implementation iterates over the - * cross-product of property values and emits one query per combination. + * @param entity The current input entity, or `None` for static templates. * @param taskProperties Parameter values of the connected input and output tasks. - * @param templateVariables Project and global template variables (scoped as `Seq("project")` / `Seq("global")`). - * Only used by the Jinja implementation; the legacy implementation ignores them. + * @param templateVariables Project and global template variables * @return One rendered query for Jinja, or one query per cross-product combination for the legacy engine. */ def generate(entity: Option[Entity], @@ -47,16 +43,15 @@ trait SparqlTemplate { /** Entity schema that the template expects on its input port. */ def inputSchema: EntitySchema + /** Output schema projected by a SELECT query. Unused for UPDATE templates. */ + def outputSchema: EntitySchema + /** True if the template does not reference any entity values and thus needs no input port. */ def isStaticTemplate: Boolean } object SparqlTemplate { - // Must match JinjaTemplateEngine.id. Duplicated here because silk-plugins-rdf does not depend on - // silk-plugins-templating-jinja at compile time (only at test scope). - private final val JINJA_ENGINE_ID = "jinja" - /** * Creates a SPARQL template using the given template engine. * @@ -65,11 +60,10 @@ object SparqlTemplate { * implementation. Pass `Seq.empty` to disable aliasing. */ def create(templateEngineId: String, template: String, defaultScope: Seq[String] = Seq.empty): SparqlTemplate = { - val engine = TemplateEngines.create(templateEngineId) - val compiled = engine.compile(template) - if (templateEngineId == JINJA_ENGINE_ID) { - new SparqlJinjaTemplate(compiled, defaultScope) + if (templateEngineId == SparqlJinjaTemplate.JINJA_ENGINE_ID) { + new SparqlJinjaTemplate(template, defaultScope) } else { + val compiled = TemplateEngines.create(templateEngineId).compile(template) new SparqlLegacyTemplate(compiled) } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala similarity index 99% rename from silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala rename to silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala index 1b97e52ec2..bd48d9d887 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateJinjaTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala @@ -9,7 +9,7 @@ import org.silkframework.runtime.templating.TemplateVariableValue import org.silkframework.runtime.templating.exceptions.{TemplateEvaluationException, UnboundVariablesException} import org.silkframework.runtime.validation.ValidationException -class SparqlTemplateJinjaTest extends AnyFlatSpec with Matchers { +class SparqlJinjaTemplateTest extends AnyFlatSpec with Matchers { behavior of "SPARQL templating with the Jinja Template Engine" diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractorTest.scala new file mode 100644 index 0000000000..35e02ef666 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractorTest.scala @@ -0,0 +1,83 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers + +class SparqlSelectVarExtractorTest extends AnyFlatSpec with Matchers { + + behavior of "SparqlSelectVarExtractor" + + it should "extract plain projected variables" in { + extract("SELECT ?a ?b WHERE { ?a ?p ?b }") mustBe Seq("a", "b") + } + + it should "strip DISTINCT" in { + extract("SELECT DISTINCT ?x ?y WHERE { ?x ?p ?y }") mustBe Seq("x", "y") + } + + it should "strip REDUCED" in { + extract("SELECT REDUCED ?x ?y WHERE { ?x ?p ?y }") mustBe Seq("x", "y") + } + + it should "return the alias of a single-expression projection" in { + extract("SELECT (?x + 1 AS ?sum) WHERE { ?x ?p ?o }") mustBe Seq("sum") + } + + it should "return only the outer AS alias for nested function calls" in { + extract("SELECT (COUNT(?x) AS ?n) WHERE { ?x ?p ?o }") mustBe Seq("n") + } + + it should "mix plain variables and AS aliases" in { + extract("SELECT ?a (?x + 1 AS ?sum) ?b WHERE { ?a ?p ?b }") mustBe Seq("a", "sum", "b") + } + + it should "fall back to all variables for SELECT *" in { + extract("SELECT * WHERE { ?s ?p ?o }") mustBe Seq("s", "p", "o") + } + + it should "fall back to all variables for SELECT * with a GRAPH clause" in { + extract("SELECT * WHERE { GRAPH { ?s ?p ?o } }") mustBe Seq("s", "p", "o") + } + + it should "be case-insensitive on SELECT / WHERE / DISTINCT / AS" in { + extract("select distinct ?a (?x + 1 as ?sum) where { ?a ?p ?o }") mustBe Seq("a", "sum") + } + + it should "tolerate a Jinja placeholder inside a string literal" in { + extract("""SELECT ?s WHERE { ?s rdfs:label "{{ input.entity.name }}" }""") mustBe Seq("s") + } + + it should "tolerate a Jinja placeholder in a numeric position" in { + extract("SELECT ?s WHERE { ?s ?p ?o } LIMIT {{ input.config.max }}") mustBe Seq("s") + } + + it should "tolerate a Jinja placeholder as a URI fragment" in { + extract("SELECT ?s WHERE { ?s a <{{ input.config.type }}> }") mustBe Seq("s") + } + + it should "accept projections terminated by a brace without a WHERE keyword" in { + extract("SELECT ?a ?b { ?a ?p ?b }") mustBe Seq("a", "b") + } + + it should "deduplicate while preserving first-appearance order" in { + extract("SELECT ?a ?b ?a WHERE { ?a ?p ?b }") mustBe Seq("a", "b") + } + + it should "return the outer projection, not inner sub-query variables" in { + extract("SELECT ?a WHERE { SELECT ?x ?y WHERE { ?x ?p ?y } }") mustBe Seq("a") + } + + it should "stop at FROM named graph clauses" in { + extract("SELECT ?a ?b FROM WHERE { ?a ?p ?b }") mustBe Seq("a", "b") + } + + it should "return an empty sequence for non-SELECT queries" in { + extract("INSERT DATA { }") mustBe empty + } + + it should "return an empty sequence for an ASK query" in { + extract("ASK WHERE { ?s ?p ?o }") mustBe empty + } + + private def extract(query: String): Seq[String] = SparqlSelectVarExtractor.extractSelectVars(query) +} From 22d4005fc4ec773b6aaa7ed81f8f0ec3cf1f7265 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Fri, 24 Apr 2026 14:37:52 +0200 Subject: [PATCH 49/63] Improved doc for SparqlSelectCustomTask --- .../rdf/tasks/SparqlSelectCustomTask.md | 64 ++++++++++++------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index c8bdd87980..cdee91e90f 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -1,8 +1,9 @@ The SPARQL SELECT plugin is a task for executing SPARQL SELECT queries on an RDF data source. +It can be used in a workflow, connecting an input to an output. A +[SPARQL 1.1 SELECT](https://www.w3.org/TR/sparql11-query/#select) query is supported; the simplest example is +`SELECT * WHERE { ?s ?p ?o }`. -It can be used in a workflow, connecting an input to an output. The _output_ is an entity table containing the _SPARQL results_ of the query -execution. A [SPARQL 1.1 SELECT](https://www.w3.org/TR/sparql11-query/#select) query is supported; the simplest -example is `SELECT * WHERE { ?s ?p ?o }`. +## Input and output The _input_ depends on the configuration: @@ -12,18 +13,27 @@ The _input_ depends on the configuration: default RDF dataset instead. If the template references input entity properties, the task accepts an entity input and generates one query per entity; otherwise it needs no input at all. -The [result limit](https://www.w3.org/TR/sparql11-query/#modResultLimit) can be restricted with the `limit` -parameter. A query timeout (in milliseconds) can be set via `sparqlTimeout`. +The _output_ is an entity table built from the query's +[SPARQL results](https://www.w3.org/TR/sparql11-results-json/#json-result-object): each projected variable becomes +a column, and each result binding becomes a row. -As usual, the SPARQL results contain both "variables" and "bindings", such as in -[this example](https://www.w3.org/TR/sparql11-results-json/#json-result-object). -This tabular raw form is transformed into an _entity table_. +The [result size](https://www.w3.org/TR/sparql11-query/#modResultLimit) can be capped with the `limit` parameter, +and a query timeout (in milliseconds) can be set via `sparqlTimeout`. -### Templating +## Automatic `FROM` clause injection -The select query is rendered by a template engine. +If the SPARQL source is defined on a specific graph, a `FROM` clause will be added to the query at execution time, +except when there already exists a `GRAPH` or `FROM` clause in the query. `FROM NAMED` clauses are not injected. + +## Templating + +The select query is rendered by a template engine before execution. [`Jinja`](https://jinja.palletsprojects.com/) is the default and is described below; for the deprecated `Simple` -and `Velocity Engine` modes, see the "Legacy template engines" section further down. +and `Velocity Engine` modes, see "Legacy template engines" at the end. + +Jinja uses `{{ ... }}` for value expressions and `{% ... %}` for control flow such as conditionals. + +### Template variables The following variables are available: @@ -34,20 +44,29 @@ The following variables are available: - `project.`: a project-scoped template variable. - `global.`: a global template variable. +Parameter, property and variable names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); +bracket-subscript access such as `input.entity["urn:prop:label"]` is not supported. + For example, to query the named graph that is configured on the input dataset: ```sparql SELECT * WHERE { GRAPH <{{ input.config.graph | validate_uri }}> { ?s ?p ?o } } ``` -Parameter, property and variable names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); -bracket-subscript access such as `input.entity["urn:prop:label"]` is not supported. +### Default scope The `defaultScope` parameter declares one scope whose variables are additionally exposed at the top level of the -template context, so they can be referenced without the scope prefix. It defaults to `input.entity`: a template -may write `{{ property }}` as a shorthand for `{{ input.entity.property }}`, and both forms resolve to the same -value. Set `defaultScope` to the empty string to disable this aliasing and require every variable to be addressed -with its full scope. +template context, so they can be referenced without the scope prefix. It defaults to `input.entity`, which means +a template may write `{{ property }}` as a shorthand for `{{ input.entity.property }}`: + +``` +{{ property }} ≡ {{ input.entity.property }} +``` + +Both forms resolve to the same value. Set `defaultScope` to the empty string to disable this aliasing and require +every variable to be addressed with its full scope. + +### Filters Values are inserted verbatim by default, so URI brackets (`<...>`) and quotation marks around literals must be written in the template. The following filters are provided to render values safely: @@ -62,14 +81,11 @@ written in the template. The following filters are provided to render values saf All transformer plugins are also available as Jinja filters under their plugin id (for example `lowerCase`, `trim`, `urlEncode`). -The output schema (i.e. the result variables) is derived from the query via a heuristic on the raw template text, -without evaluating it. If the heuristic cannot determine any output variables (for example, when the `SELECT` clause itself is produced -by a Jinja expression), the output port is reported with an unknown schema instead. - -### Automatic `FROM` clause injection +### Output schema inference -If the SPARQL source is defined on a specific graph, a `FROM` clause will be added to the query at execution time, -except when there already exists a `GRAPH` or `FROM` clause in the query. `FROM NAMED` clauses are not injected. +The output schema (i.e. the result variables) is derived from the query via a heuristic on the raw template text, +without evaluating it. If the heuristic cannot determine any output variables (for example, when the `SELECT` +clause itself is produced by a Jinja expression), the output port is reported with an unknown schema instead. ### Legacy template engines From 7d3809bff6ed2bb5c304b03c6b803c47d4b0522f Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Fri, 24 Apr 2026 16:39:22 +0200 Subject: [PATCH 50/63] Fix TemplateVariableJson: scope should stay a single string --- .../workspaceApi/coreApi/VariableTemplateApi.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala b/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala index 6babfd341a..f48e4a9be5 100644 --- a/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala +++ b/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala @@ -446,21 +446,21 @@ object VariableTemplateApi { ) isSensitive: Boolean, @Schema( - description = "The scope of the variable as a sequence of strings forming a prefix path, e.g. [\"project\"] or [\"project\", \"metaData\"].", + description = "The scope of the variable, e.g. \"project\".", requiredMode = RequiredMode.REQUIRED ) - scope: Seq[String]) { + scope: String) { def convert: TemplateVariable = { if (value.isEmpty && template.isEmpty) { throw new BadUserInputException("Either the variable value or its template has to be defined.") } - TemplateVariable(name, value.getOrElse(""), template, description, isSensitive, scope) + TemplateVariable(name, value.getOrElse(""), template, description, isSensitive, scope.split('.').toIndexedSeq) } } object TemplateVariableJson { def apply(variable: TemplateVariable): TemplateVariableJson = { - TemplateVariableJson(variable.name, Some(variable.value), variable.template, variable.description, variable.isSensitive, variable.scope) + TemplateVariableJson(variable.name, Some(variable.value), variable.template, variable.description, variable.isSensitive, variable.scope.mkString(".")) } } From 6f11ad2992467c549e7d70d07e53dbc6e4e23afa Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Fri, 24 Apr 2026 17:15:03 +0200 Subject: [PATCH 51/63] JinjaTemplateEngine: Need to set classloader for all evaluations. --- .../jinja/JinjaTemplateEngine.scala | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala index f654c095b3..a98bc5a16e 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala @@ -29,22 +29,33 @@ object JinjaTemplateEngine { private val interpreters = new ThreadLocal[JinjavaInterpreter] { override protected def initialValue(): JinjavaInterpreter = { - // There is a bug in Jinja 2.6.0, if a different context class loader is used: https://github.com/HubSpot/jinjava/issues/317 - val curClassLoader = Thread.currentThread.getContextClassLoader - try { - Thread.currentThread.setContextClassLoader(this.getClass.getClassLoader) + withPluginClassLoader { val config = JinjavaConfig.newBuilder.withFailOnUnknownTokens(true).build() val jinja = new Jinjava(config) TransformFilters.register(jinja.getGlobalContext) val interpreter = jinja.newInterpreter() JinjavaInterpreter.pushCurrent(interpreter) // Macros will request the current interpreter (thread-local) interpreter - } finally { - Thread.currentThread.setContextClassLoader(curClassLoader) } } } + /** + * Runs `body` with the thread context class loader set to this plugin's class loader. + * Jinjava loads its shaded EL `ExpressionFactory` via the context class loader (ServiceLoader), + * so both parsing and evaluation must run under a class loader that can see the plugin jar. + * See https://github.com/HubSpot/jinjava/issues/317. + */ + def withPluginClassLoader[T](body: => T): T = { + val curClassLoader = Thread.currentThread.getContextClassLoader + try { + Thread.currentThread.setContextClassLoader(this.getClass.getClassLoader) + body + } finally { + Thread.currentThread.setContextClassLoader(curClassLoader) + } + } + /** * Retrieves an interpreter instance. */ @@ -119,7 +130,8 @@ class JinjaTemplate(val node: Node) extends CompiledTemplate { interpreter.getContext.put(key, value) } try { - writer.write(interpreter.render(node, false)) + val rendered = JinjaTemplateEngine.withPluginClassLoader(interpreter.render(node, false)) + writer.write(rendered) } catch { case ex: UnknownTokenException => throw new UnboundVariablesException(Seq(TemplateVariableName.parse(ex.getToken)), Some(ex)) From fb5cdc204a77ab5591d0dda35a5be8a0aa97db5f Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Fri, 24 Apr 2026 17:25:22 +0200 Subject: [PATCH 52/63] SparqlEndpointDatasetAutoCompletionProvider: Should not try to retrieve the label for empty values since this is used to signify no selection. --- .../SparqlEndpointDatasetAutoCompletionProvider.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlEndpointDatasetAutoCompletionProvider.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlEndpointDatasetAutoCompletionProvider.scala index 735fc48964..827088fd2a 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlEndpointDatasetAutoCompletionProvider.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlEndpointDatasetAutoCompletionProvider.scala @@ -24,7 +24,12 @@ case class SparqlEndpointDatasetAutoCompletionProvider() extends PluginParameter override def valueToLabel(value: String, dependOnParameterValues: Seq[ParamValue], workspace: WorkspaceReadTrait) (implicit context: PluginContext): Option[String] = { implicit val userContext: UserContext = context.user - val projectId = context.projectId.getOrElse(throw new ValidationException("Project not provided")) - workspace.project(projectId).taskOption[GenericDatasetSpec](value).flatMap(_.metaData.label) + if(value == "") { + // No endpoint selected + None + } else { + val projectId = context.projectId.getOrElse(throw new ValidationException("Project not provided")) + workspace.project(projectId).taskOption[GenericDatasetSpec](value).flatMap(_.metaData.label) + } } } From aa9c2bd4a987cfdd8c864f67959329816c6e1ce9 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 27 Apr 2026 11:16:30 +0200 Subject: [PATCH 53/63] Add general validation for SparqlTemplate using the variables that are available at creation time --- .../runtime/plugin/ParameterType.scala | 2 +- .../runtime/plugin/types/CodeParameters.scala | 12 ++++++++-- .../rdf/tasks/SparqlSelectCustomTask.scala | 3 +++ .../rdf/tasks/SparqlUpdateCustomTask.scala | 4 +++- .../templating/SparqlJinjaTemplate.scala | 22 +++---------------- .../templating/SparqlLegacyTemplate.scala | 9 ++++---- .../rdf/tasks/templating/SparqlTemplate.scala | 9 ++------ ...arqlUpdateTemplatingEngineSimpleTest.scala | 3 ++- .../templating/SparqlJinjaTemplateTest.scala | 22 ------------------- .../SparqlTemplateVelocityTest.scala | 3 ++- 10 files changed, 31 insertions(+), 58 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/plugin/ParameterType.scala b/silk-core/src/main/scala/org/silkframework/runtime/plugin/ParameterType.scala index 6dd95b21e5..f0c9c829a3 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/plugin/ParameterType.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/plugin/ParameterType.scala @@ -752,7 +752,7 @@ object StringParameterType { private object SparqlCodeParameterType extends CodeParameterType[SparqlCodeParameter] { override def codeMode: String = "sparql" - override def fromString(str: String)(implicit context: PluginContext): SparqlCodeParameter = SparqlCodeParameter(str) + override def fromString(str: String)(implicit context: PluginContext): SparqlCodeParameter = SparqlCodeParameter(str, Some(context.templateVariables)) } private object SqlCodeParameterType extends CodeParameterType[SqlCodeParameter] { diff --git a/silk-core/src/main/scala/org/silkframework/runtime/plugin/types/CodeParameters.scala b/silk-core/src/main/scala/org/silkframework/runtime/plugin/types/CodeParameters.scala index 7337295bf5..8d5f6a0b12 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/plugin/types/CodeParameters.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/plugin/types/CodeParameters.scala @@ -1,7 +1,9 @@ package org.silkframework.runtime.plugin.types -import scala.language.implicitConversions +import org.silkframework.runtime.templating.TemplateVariablesReader + import scala.collection.immutable.ArraySeq +import scala.language.implicitConversions sealed trait CodeParameter { def str: String @@ -23,7 +25,13 @@ object Jinja2CodeParameter { case class JsonCodeParameter(var str: String) extends CodeParameter -case class SparqlCodeParameter(var str: String) extends CodeParameter +/** + * A SPARQL code parameter that might contain Jinja template variables. + * + * @param str The SPARQL query + * @param variables That variables that are available at creation time + */ +case class SparqlCodeParameter(var str: String, val variables: Option[TemplateVariablesReader] = None) extends CodeParameter object SparqlCodeParameter { implicit def str2parameter(str: String): SparqlCodeParameter = SparqlCodeParameter(str) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 3326cdc39f..866d4ff69c 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -85,6 +85,9 @@ case class SparqlSelectCustomTask( private val defaultScopePath: Seq[String] = defaultScope.split('.').map(_.trim).filter(_.nonEmpty).toSeq val queryTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, selectQuery.str, defaultScopePath) + for(variables <- selectQuery.variables) { + queryTemplate.validate(variables, None) + } def isStaticTemplate: Boolean = queryTemplate.isStaticTemplate diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 4fbe590b7c..91f127ff28 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -50,7 +50,9 @@ case class SparqlUpdateCustomTask( assert(batchSize >= 1, "Batch size must be greater zero!") val compiledTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, sparqlUpdateTemplate.str) - compiledTemplate.validateUpdateQuery(batchSize) + for(variables <- sparqlUpdateTemplate.variables) { + compiledTemplate.validate(variables, None) + } def isStaticTemplate: Boolean = compiledTemplate.isStaticTemplate diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala index 540be5466e..d37f5072ea 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala @@ -3,7 +3,7 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.silkframework.entity.paths.{TypedPath, UntypedPath} import org.silkframework.entity.{Entity, EntitySchema, ValueType} import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateVariableConversions, TemplateVariableName, TemplateVariableValue} +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateVariableConversions, TemplateVariableName, TemplateVariableValue, TemplateVariablesReader} import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.Uri @@ -45,24 +45,8 @@ class SparqlJinjaTemplate(rawTemplate: String, defaultScope: Seq[String] = Seq.e Seq(writer.toString) } - override def generateWithDefaults(): String = { - // Seed every referenced variable with a URI-like default so that QueryFactory can parse the result. - val genericUri = "urn:generic:1" - val defaults = referencedVariables.distinct.map(v => new TemplateVariableValue(v.name, v.scope, Seq(genericUri))) - Try { - val writer = new StringWriter() - template.evaluate(defaults, writer) - writer.toString - } match { - case Success(query) => query - case Failure(exception) => - throw new ValidationException( - "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) - } - } - - override def validateUpdateQuery(batchSize: Int): Unit = { - SparqlTemplate.validateParseability(generateWithDefaults(), batchSize) + override def validate(variables: TemplateVariablesReader, batchSize: Option[Int]): Unit = { + // TODO } override def inputSchema: EntitySchema = { diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala index 3c9cf27846..0adaaf906a 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala @@ -5,7 +5,7 @@ import org.silkframework.entity.paths.{TypedPath, UntypedPath} import org.silkframework.entity.{Entity, EntitySchema, ValueType} import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlLegacyTemplate._ -import org.silkframework.runtime.templating.{CompiledTemplate, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue, TemplateVariablesReader} import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.Uri @@ -48,7 +48,8 @@ class SparqlLegacyTemplate(template: CompiledTemplate) extends SparqlTemplate { writer.toString } - override def generateWithDefaults(): String = { + /** Renders the template with example values for every variable. Used to derive schemas and validate queries. */ + private def generateWithDefaults(): String = { val genericUri = "urn:generic:1" val entityVariables = entityVariableNames val assignments = entityVariables.map(_ -> genericUri).toMap @@ -63,10 +64,10 @@ class SparqlLegacyTemplate(template: CompiledTemplate) extends SparqlTemplate { } } - override def validateUpdateQuery(batchSize: Int): Unit = { + override def validate(variables: TemplateVariablesReader, batchSize: Option[Int]): Unit = { if (!usesRawUnsafe) { // Skipped for rawUnsafe templates: they can generate arbitrary SPARQL syntax so example-query validation is unreliable. - SparqlTemplate.validateParseability(generateWithDefaults(), batchSize) + SparqlTemplate.validateParseability(generateWithDefaults(), batchSize.getOrElse(1)) } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala index b338b73f1e..cc7ace988e 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -4,7 +4,7 @@ import org.apache.jena.update.UpdateFactory import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.runtime.plugin.PluginContext -import org.silkframework.runtime.templating.{TemplateEngines, TemplateVariableValue} +import org.silkframework.runtime.templating.{TemplateEngines, TemplateVariableValue, TemplateVariablesReader} import org.silkframework.runtime.validation.ValidationException import scala.util.Try @@ -34,11 +34,8 @@ trait SparqlTemplate { taskProperties: TaskProperties, templateVariables: Seq[TemplateVariableValue] = Seq.empty): Iterable[String] - /** Renders the template with example values for every variable. Used to derive schemas and validate queries. */ - def generateWithDefaults(): String - /** Validates the template and, if batchSize > 1, that batching produces valid SPARQL. */ - def validateUpdateQuery(batchSize: Int): Unit + def validate(variables: TemplateVariablesReader, batchSize: Option[Int]): Unit /** Entity schema that the template expects on its input port. */ def inputSchema: EntitySchema @@ -72,8 +69,6 @@ object SparqlTemplate { * Verifies that a rendered example query parses as SPARQL Update, and — when batchSize > 1 — that two * consecutive copies also parse (so batching in [[org.silkframework.plugins.dataset.rdf.executors.BatchSparqlUpdateEmitter]] * produces valid queries). - * - * Shared between [[SparqlJinjaTemplate]] and [[SparqlLegacyTemplate]]. */ private[templating] def validateParseability(query: String, batchSize: Int): Unit = { Try(UpdateFactory.create(query)).failed.toOption.foreach { parseError => diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index 9997618aed..6dc6b2a85c 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -9,6 +9,7 @@ import org.silkframework.runtime.validation.ValidationException import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers import org.silkframework.config.{FixedNumberOfInputs, FixedSchemaPort} +import org.silkframework.runtime.templating.{InMemoryTemplateVariablesReader, TemplateVariables} class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { behavior of "SPARQL Update Simple Templating Engine" @@ -93,7 +94,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { def parse(sparqlUpdateTemplate: String, batchSize: Int = 2): Seq[SparqlUpdateTemplatePart] = { val compiled = SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate) - new SparqlLegacyTemplate(compiled).validateUpdateQuery(batchSize) + new SparqlLegacyTemplate(compiled).validate(InMemoryTemplateVariablesReader(TemplateVariables.empty, Set.empty), Some(batchSize)) compiled.sparqlUpdateTemplateParts } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala index bd48d9d887..c9f2e2f493 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala @@ -139,28 +139,6 @@ class SparqlJinjaTemplateTest extends AnyFlatSpec with Matchers { } } - it should "validate Jinja SPARQL Update templates by parsing an example query" in { - val validTemplate = SparqlTemplate.create(JinjaTemplateEngine.id, - """INSERT DATA { <{{ input.entity.subject | validate_uri }}> "{{ input.entity.label | escape_literal }}" } ;""") - // Well-formed template with batch size > 1 must not throw. - validTemplate.validateUpdateQuery(batchSize = 2) - - // Template that forgets to wrap the URI variable with `<...>` produces unparseable SPARQL. - val unwrappedUri = SparqlTemplate.create(JinjaTemplateEngine.id, - """INSERT DATA { {{ input.entity.subject }} "x" } ;""") - intercept[ValidationException] { - unwrappedUri.validateUpdateQuery(batchSize = 1) - } - - // Template without a trailing `;` is fine for batchSize = 1 but fails for batchSize > 1. - val missingSemicolon = SparqlTemplate.create(JinjaTemplateEngine.id, - """INSERT DATA { <{{ input.entity.subject | validate_uri }}> "x" }""") - missingSemicolon.validateUpdateQuery(batchSize = 1) - intercept[ValidationException] { - missingSemicolon.validateUpdateQuery(batchSize = 2) - } - } - it should "alias input.entity variables to bare references when defaultScope = input.entity" in { val template = SparqlTemplate.create(JinjaTemplateEngine.id, """<{{ subject }}> "{{ input.entity.label }}"""", diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala index eddec735dc..a88dbc0c2b 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala @@ -7,6 +7,7 @@ import org.silkframework.plugins.templating.velocity.VelocityTemplateEngine import org.silkframework.runtime.validation.ValidationException import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers +import org.silkframework.runtime.templating.{InMemoryTemplateVariablesReader, TemplateVariables} import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { @@ -126,6 +127,6 @@ class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { } def validate(template: String, batchSize: Int = 2): Unit = { - new SparqlLegacyTemplate(VelocityTemplateEngine().compile(template)).validateUpdateQuery(batchSize) + new SparqlLegacyTemplate(VelocityTemplateEngine().compile(template)).validate(InMemoryTemplateVariablesReader(TemplateVariables.empty, Set.empty), Some(batchSize)) } } From 8e0e9220c546608c99be1ec9a61945a845d82921 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Mon, 27 Apr 2026 11:29:15 +0200 Subject: [PATCH 54/63] Add and document validation of Jinja SPARQL templates --- .../rdf/tasks/SparqlSelectCustomTask.md | 24 +++++++++++++-- .../rdf/tasks/SparqlUpdateCustomTask.md | 14 +++++++++ .../templating/SparqlJinjaTemplate.scala | 30 +++++++++++++++++-- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index cdee91e90f..0e02472ca8 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -83,9 +83,27 @@ All transformer plugins are also available as Jinja filters under their plugin i ### Output schema inference -The output schema (i.e. the result variables) is derived from the query via a heuristic on the raw template text, -without evaluating it. If the heuristic cannot determine any output variables (for example, when the `SELECT` -clause itself is produced by a Jinja expression), the output port is reported with an unknown schema instead. +The output schema is derived from the raw template by a heuristic, without rendering it. The heuristic takes +the projection between `SELECT` and the first `WHERE`, `FROM` or `{`, drops a leading `DISTINCT` / `REDUCED`, +and then: + +- For `SELECT *`, collects every distinct `?var` token in the query. +- Otherwise, collects each top-level `?var` and the trailing `AS ?alias` from parenthesised expressions + (e.g. `(COUNT(?s) AS ?count)` yields `count`). + +Each variable becomes a string-typed path. If no variables can be detected (e.g. the projection is produced by +a Jinja expression), the output port is reported with an unknown schema. + +### Validation + +At task creation, the Jinja template is checked against the available template variables: + +- Every `project.<...>` or `global.<...>` reference must resolve to a known variable, matched on the full + scoped name (so e.g. `project.metaData.label` is looked up at that exact scope). +- Every `input.<...>` or `output.<...>` reference must use `config` or `entity` as its second segment. + +Bare references are resolved through `defaultScope` before applying the same rules. The template is not +rendered and the resulting SPARQL is not parsed. ### Legacy template engines diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md index aebfefabc4..56ac93a01c 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md @@ -52,6 +52,20 @@ written in the template. The following filters are provided to render values saf All transformer plugins are also available as Jinja filters under their plugin id (for example `lowerCase`, `trim`, `urlEncode`). +### Validation + +At task creation, the template is checked against the available template variables. What is checked depends +on the selected templating mode: + +- `Jinja`: + - Every `project.<...>` or `global.<...>` reference must resolve to a known variable, matched on the full + scoped name (so e.g. `project.metaData.label` is looked up at that exact scope). + - Every `input.<...>` or `output.<...>` reference must use `config` or `entity` as its second segment. + - The template is not rendered and the resulting SPARQL is not parsed. +- `Simple` / `Velocity Engine`: + - The template is rendered once with placeholder values and the result must parse as a SPARQL Update query. + - Templates that use `rawUnsafe` skip this parse check. + ### Example of the `Simple` mode (deprecated) ``` diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala index d37f5072ea..bea941c521 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala @@ -8,7 +8,6 @@ import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.Uri import java.io.StringWriter -import scala.util.{Failure, Success, Try} /** * SPARQL template implementation for the Jinja engine. @@ -46,7 +45,30 @@ class SparqlJinjaTemplate(rawTemplate: String, defaultScope: Seq[String] = Seq.e } override def validate(variables: TemplateVariablesReader, batchSize: Option[Int]): Unit = { - // TODO + val available = variables.all.variables.map(v => (v.name, v.scope)).toSet + for (variable <- referencedVariables.distinct) { + val effectiveScope = if (variable.scope.isEmpty) defaultScope else variable.scope + validateReference(variable, effectiveScope, available) + } + } + + private def validateReference(variable: TemplateVariableName, + effectiveScope: Seq[String], + available: Set[(String, Seq[String])]): Unit = { + effectiveScope.headOption match { + case Some(top) if VARIABLE_SCOPES.contains(top) => + if (!available.contains((variable.name, effectiveScope))) { + throw new ValidationException(s"Unknown template variable '${variable.scopedName}'.") + } + case Some(top) if TASK_SCOPES.contains(top) => + val subSection = effectiveScope.lift(1).getOrElse("") + if (!TASK_SUB_SECTIONS.contains(subSection)) { + throw new ValidationException( + s"Invalid template variable '${variable.scopedName}'. " + + s"Only '$top.config.' and '$top.entity.' are valid.") + } + case _ => + } } override def inputSchema: EntitySchema = { @@ -111,4 +133,8 @@ object SparqlJinjaTemplate { private[templating] final val INPUT_CONFIG_SCOPE: Seq[String] = Seq("input", "config") private[templating] final val INPUT_ENTITY_SCOPE: Seq[String] = Seq("input", "entity") private[templating] final val OUTPUT_CONFIG_SCOPE: Seq[String] = Seq("output", "config") + + private final val VARIABLE_SCOPES: Set[String] = Set("project", "global") + private final val TASK_SCOPES: Set[String] = Set("input", "output") + private final val TASK_SUB_SECTIONS: Set[String] = Set("config", "entity") } From a17775b4ac7aa469c86c31a240d5c4e63eba8744 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 11:20:28 +0200 Subject: [PATCH 55/63] SparqlSelectCustomTask.md: Update doc to include section on input schema inference. --- .../dataset/rdf/tasks/SparqlSelectCustomTask.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index 0e02472ca8..d20c3a9f4c 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -81,6 +81,19 @@ written in the template. The following filters are provided to render values saf All transformer plugins are also available as Jinja filters under their plugin id (for example `lowerCase`, `trim`, `urlEncode`). +### Input schema inference + +The input schema (the entity properties the task expects) is derived by scanning the raw template for +`input.entity.` references (or bare references resolved via `defaultScope`). This scan operates +on the template text before rendering, so SPARQL line comments (`# ...`) are **not** stripped: a +commented-out line such as + +```sparql +# {{ input.entity.property }} +``` + +will still cause `property` to appear in the inferred input schema. + ### Output schema inference The output schema is derived from the raw template by a heuristic, without rendering it. The heuristic takes From d8f90d5a08726b1961b5fb4366febdade3b076bb Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 11:29:28 +0200 Subject: [PATCH 56/63] SparqlSelectCustomTask: Also show number of queries in report --- .../executors/LocalSparqlSelectExecutor.scala | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index 03b149940f..45f4a721f9 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -85,6 +85,7 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT val projected = Entity(entity.uri, values, expectedSchema) val queries = sparqlSelectTask.queryTemplate.generate(Some(projected), taskProperties, templateVariables) queries.iterator.flatMap { query => + executionReportUpdater.foreach(_.increaseQueryCounter()) LocalSparqlSelectIterator.executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)).bindings } } else { @@ -112,6 +113,7 @@ class LocalSparqlSelectIterator(sparqlSelectTask: SparqlSelectCustomTask, val taskProperties = TaskProperties.create(inputTask, outputTask, pluginContext) val templateVariables = pluginContext.templateVariables.all.variables val query = sparqlSelectTask.queryTemplate.generate(None, taskProperties, templateVariables).head + executionReportUpdater.foreach(_.increaseQueryCounter()) val results = LocalSparqlSelectIterator.executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) val vars = LocalSparqlSelectIterator.getSparqlVars(sparqlSelectTask) LocalSparqlSelectIterator.createEntities(sparqlSelectTask, results.bindings, vars, executionReportUpdater) @@ -186,11 +188,22 @@ object LocalSparqlSelectIterator { case class SparqlSelectExecutionReportUpdater(task: Task[TaskSpec], context: ActivityContext[ExecutionReport]) extends ExecutionReportUpdater { - override def operationLabel: Option[String] = Some("generate queries") + private var queriesStarted = 0 + + def increaseQueryCounter(): Unit = { + queriesStarted += 1 + } override def entityLabelSingle: String = "Row" override def entityLabelPlural: String = "Rows" - override def minEntitiesBetweenUpdates: Int = 1 + override def entityProcessVerb: String = { + val queryWord = if (queriesStarted == 1) "query" else "queries" + s"processed ($queriesStarted $queryWord)" + } + + override def additionalFields(): Seq[(String, String)] = { + Seq("Queries" -> queriesStarted.toString).filter(_ => queriesStarted > 0) + } } \ No newline at end of file From dbe2a34b790c163166ee2b99b2842fef54a0446e Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 12:22:28 +0200 Subject: [PATCH 57/63] SparqlSelectCustomTask: Fail if a referenced variable is not provided --- .../executors/LocalSparqlSelectExecutor.scala | 14 +++++--------- .../templating/SparqlJinjaTemplateTest.scala | 18 +++++++++++++++++- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index 45f4a721f9..fd2b5c7f4c 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -81,15 +81,11 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT val bindings = input.entities.flatMap { entity => val values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) - if (values.forall(_.nonEmpty)) { - val projected = Entity(entity.uri, values, expectedSchema) - val queries = sparqlSelectTask.queryTemplate.generate(Some(projected), taskProperties, templateVariables) - queries.iterator.flatMap { query => - executionReportUpdater.foreach(_.increaseQueryCounter()) - LocalSparqlSelectIterator.executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)).bindings - } - } else { - Iterator.empty + val projected = Entity(entity.uri, values, expectedSchema) + val queries = sparqlSelectTask.queryTemplate.generate(Some(projected), taskProperties, templateVariables) + queries.iterator.flatMap { query => + executionReportUpdater.foreach(_.increaseQueryCounter()) + LocalSparqlSelectIterator.executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)).bindings } } LocalSparqlSelectIterator.createEntities(sparqlSelectTask, bindings, vars, executionReportUpdater) diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala index c9f2e2f493..ab84b1f950 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala @@ -47,7 +47,7 @@ class SparqlJinjaTemplateTest extends AnyFlatSpec with Matchers { result must include("projectValue / globalValue") } - it should "reject old Jinja syntax (row, inputProperties, outputProperties)" in { + it should "reject old syntax (row, inputProperties, outputProperties)" in { intercept[UnboundVariablesException] { generate("""{{ row.uri("x") }}""", assignments = Map("x" -> "urn:a:b")) } @@ -57,6 +57,22 @@ class SparqlJinjaTemplateTest extends AnyFlatSpec with Matchers { } } + it should "fail if a referenced variable is not provided" in { + intercept[UnboundVariablesException] { + generate("""{{ project.missing }}""") + } + intercept[UnboundVariablesException] { + generate("""{{ global.missing }}""") + } + intercept[UnboundVariablesException] { + SparqlTemplate.create(JinjaTemplateEngine.id, """{{ input.entity.subject }}""") + .generate(None, TaskProperties(Map.empty, Map.empty)).head + } + intercept[UnboundVariablesException] { + generate("""{{ input.entity.existing }} {{ input.entity.missing }}""", assignments = Map("existing" -> "urn:x:1")) + } + } + it should "derive the input schema from input.entity.* references" in { val template = SparqlTemplate.create(JinjaTemplateEngine.id, """ From 00fe728b086c1b1ec4fec8e4b2f6650eba30a893 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 13:45:25 +0200 Subject: [PATCH 58/63] SparqlSelectCustomTask: Record the correct original error --- .../executors/LocalSparqlSelectExecutor.scala | 48 ++++++------------- .../LocalSparqlSelectExecutorTest.scala | 6 ++- 2 files changed, 19 insertions(+), 35 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index fd2b5c7f4c..a1b4b4872e 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -6,7 +6,7 @@ import org.silkframework.dataset.rdf.{RdfDataset, RdfNode, SparqlEndpoint, Sparq import org.silkframework.entity.Entity import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution, LocalExecutor} import org.silkframework.execution.typed.SparqlEndpointEntitySchema -import org.silkframework.execution.{ExecutionReport, ExecutionReportUpdater, ExecutorOutput, TaskException} +import org.silkframework.execution.{ExecutionReport, ExecutionReportUpdater, ExecutorOutput, ReportingIterator, TaskException} import org.silkframework.plugins.dataset.rdf.DefaultRdfDataset import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask import org.silkframework.plugins.dataset.rdf.tasks.templating.TaskProperties @@ -27,20 +27,21 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT context: ActivityContext[ExecutionReport]) (implicit pluginContext: PluginContext): Option[LocalEntities] = { val taskData = task.data + implicit val prefixes: Prefixes = pluginContext.prefixes implicit val executionReportUpdater: SparqlSelectExecutionReportUpdater = SparqlSelectExecutionReportUpdater(task, context) inputs match { case Seq(SparqlEndpointEntitySchema(sparql)) => val entities = executeOnSparqlEndpoint(taskData, sparql.task, output.task, executionReportUpdater = Some(executionReportUpdater)) - Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) + Some(ReportingIterator.addReporter(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task))) case Seq() if taskData.useDefaultDataset => val rdfDataset = DefaultRdfDataset.resolve() val entities = executeOnDefaultDataset(taskData, rdfDataset, output.task, executionReportUpdater = Some(executionReportUpdater)) - Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) + Some(ReportingIterator.addReporter(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task))) case Seq(input) if taskData.useDefaultDataset => val rdfDataset = DefaultRdfDataset.resolve() - val entities = executeOnDefaultDatasetPerEntity(taskData, rdfDataset, input, output.task, executionReportUpdater = Some(executionReportUpdater)) - Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) + val entities = executeOnDefaultDatasetPerEntity(taskData, rdfDataset, input, output.task, executionReportUpdater) + Some(ReportingIterator.addReporter(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task))) case _ => throw TaskException("SPARQL select executor did not receive a SPARQL endpoint as requested!") } @@ -68,8 +69,8 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT rdfDataset: RdfDataset, input: LocalEntities, outputTask: Option[Task[_ <: TaskSpec]], - limit: Int = Integer.MAX_VALUE, - executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) + executionReportUpdater: SparqlSelectExecutionReportUpdater, + limit: Int = Integer.MAX_VALUE) (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { implicit val user: UserContext = pluginContext.user val sparqlEndpoint = rdfDataset.sparqlEndpoint @@ -84,11 +85,11 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT val projected = Entity(entity.uri, values, expectedSchema) val queries = sparqlSelectTask.queryTemplate.generate(Some(projected), taskProperties, templateVariables) queries.iterator.flatMap { query => - executionReportUpdater.foreach(_.increaseQueryCounter()) + executionReportUpdater.increaseQueryCounter() LocalSparqlSelectIterator.executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)).bindings } } - LocalSparqlSelectIterator.createEntities(sparqlSelectTask, bindings, vars, executionReportUpdater) + LocalSparqlSelectIterator.createEntities(sparqlSelectTask, bindings, vars) } } @@ -112,7 +113,7 @@ class LocalSparqlSelectIterator(sparqlSelectTask: SparqlSelectCustomTask, executionReportUpdater.foreach(_.increaseQueryCounter()) val results = LocalSparqlSelectIterator.executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) val vars = LocalSparqlSelectIterator.getSparqlVars(sparqlSelectTask) - LocalSparqlSelectIterator.createEntities(sparqlSelectTask, results.bindings, vars, executionReportUpdater) + LocalSparqlSelectIterator.createEntities(sparqlSelectTask, results.bindings, vars) } } @@ -152,32 +153,13 @@ object LocalSparqlSelectIterator { def createEntities(taskData: SparqlSelectCustomTask, bindings: CloseableIterator[SortedMap[String, RdfNode]], - vars: IndexedSeq[String], - executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]): CloseableIterator[Entity] = { - implicit val prefixes: Prefixes = Prefixes.empty - var schemaReported = false - val increase: Entity => Unit = (entity: Entity) => executionReportUpdater match { - case Some(updater) => - if (!schemaReported) { - schemaReported = true - updater.startNewOutputSamples(entity.schema) - } - updater.addEntityAsSampleEntity(entity) - updater.increaseEntityCounter() - case None => // no-op - } - + vars: IndexedSeq[String]): CloseableIterator[Entity] = { var count = 0 - val entityIterator = bindings.map { binding => + bindings.map { binding => count += 1 - val values = vars map { v => - binding.get(v).toSeq.map(_.value) - } - val entity = Entity(DataSource.URN_NID_PREFIX + count, values = values, schema = taskData.outputSchema) - increase(entity) - entity + val values = vars.map(v => binding.get(v).toSeq.map(_.value)) + Entity(DataSource.URN_NID_PREFIX + count, values = values, schema = taskData.outputSchema) } - entityIterator.thenClose(() => executionReportUpdater.foreach(updater => updater.executionDone())) } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index 8ac78499a3..f8036d61db 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -3,16 +3,18 @@ package org.silkframework.plugins.dataset.rdf.executors import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers -import org.silkframework.config.{PlainTask, Task} +import org.silkframework.config.{PlainTask, Prefixes, Task} import org.silkframework.dataset.rdf._ import org.silkframework.dataset.{DataSource, DatasetSpec, EntitySink, LinkSink} import org.silkframework.entity.paths.TypedPath import org.silkframework.entity.{Entity, EntitySchema, ValueType} +import org.silkframework.execution.ReportingIterator import org.silkframework.execution.local.GenericEntityTable import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask import org.silkframework.runtime.activity.{TestUserContextTrait, UserContext} import org.silkframework.runtime.iterator.{CloseableIterator, TraversableIterator} import org.silkframework.runtime.plugin.{ParameterValues, PluginContext} +import org.silkframework.runtime.templating.exceptions.UnboundVariablesException import org.silkframework.util.{MockitoSugar, TestMocks} import scala.collection.immutable.SortedMap @@ -87,7 +89,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) val results = LocalSparqlSelectExecutor() - .executeOnDefaultDatasetPerEntity(task, stubDataset, inputTable, outputTask = None, executionReportUpdater = Some(reportUpdater)) + .executeOnDefaultDatasetPerEntity(task, stubDataset, inputTable, outputTask = None, executionReportUpdater = reportUpdater) .toList capturedQueries.toSeq must have size 2 From 242e45b47a5d32b44af27cc386092a981ab1f9c8 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 13:50:51 +0200 Subject: [PATCH 59/63] Fixed LocalSparqlSelectExecutorTest --- .../LocalSparqlSelectExecutorTest.scala | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index f8036d61db..daf70b9be2 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -80,8 +80,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec val inputSchema = EntitySchema("", typedPaths = IndexedSeq(TypedPath("s", ValueType.URI))) val inputEntities = Seq( Entity("urn:in:1", IndexedSeq(Seq("http://example.org/a")), inputSchema), - Entity("urn:in:2", IndexedSeq(Seq("http://example.org/b")), inputSchema), - Entity("urn:in:3", IndexedSeq(Seq()), inputSchema) // skipped: missing value + Entity("urn:in:2", IndexedSeq(Seq("http://example.org/b")), inputSchema) ) val inputTable = GenericEntityTable(inputEntities, inputSchema, PlainTask("inputTask", DatasetSpec(stubDataset))) @@ -99,6 +98,29 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec results.size mustBe (rowsPerQuery * 2) } + it should "fail when an input entity is missing a value referenced by the template" in { + val query = """SELECT ?p ?o WHERE { <{{ input.entity.s }}> ?p ?o }""" + val task = SparqlSelectCustomTask(query, useDefaultDataset = true) + + val sparqlEndpoint = sparqlEndpointStub() + val stubDataset = new StubRdfDataset(sparqlEndpoint) + + val inputSchema = EntitySchema("", typedPaths = IndexedSeq(TypedPath("s", ValueType.URI))) + val inputEntities = Seq( + Entity("urn:in:1", IndexedSeq(Seq()), inputSchema) + ) + val inputTable = GenericEntityTable(inputEntities, inputSchema, PlainTask("inputTask", DatasetSpec(stubDataset))) + + val activityContextMock = TestMocks.activityContextMock() + val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) + + an[UnboundVariablesException] must be thrownBy { + LocalSparqlSelectExecutor() + .executeOnDefaultDatasetPerEntity(task, stubDataset, inputTable, outputTask = None, executionReportUpdater = reportUpdater) + .toList + } + } + it should "evaluate a Jinja query template using the graph variable from the task parameters" in { val graphUri = "http://example.org/testGraph" val query = """SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } }""" From 70c739c3f67299994ab493c504b340490847f394 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 13:56:20 +0200 Subject: [PATCH 60/63] LocalSparqlUpdateExecutor: Should fail if variables are missing. --- .../rdf/executors/LocalSparqlUpdateExecutor.scala | 4 ++-- .../rdf/LocalSparqlUpdateExecutorTest.scala | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala index aa8f4fcd41..01a579b4bd 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala @@ -35,8 +35,8 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT val inputProperties = getInputProperties(input.entitySchema).distinct val taskProperties = TaskProperties.create(Some(input.task), output.task, pluginContext) checkInputSchema(expectedProperties, inputProperties.toSet) - for (entity <- input.entities; - values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) if values.forall(_.nonEmpty)) { + for (entity <- input.entities) { + val values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) val projected = Entity(entity.uri, values, expectedSchema) for (query <- updateTask.compiledTemplate.generate(Some(projected), taskProperties, templateVariables)) { batchEmitter.update(query) diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala index 4beb2ae8a1..5bddfe8108 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala @@ -31,11 +31,8 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW private val batchSize = 5 private val sparqlUpdateTemplate = s"""INSERT DATA { $${} $${"v"} } ;""" private val schema = EntitySchema("", typedPaths = IndexedSeq(TypedPath("s", ValueType.URI), TypedPath("v", ValueType.STRING))) - private val notIncluded = "NOT_INCLUDED" private val inputEntities: Seq[Entity] = Seq( Entity("http://example.org/entity/1", IndexedSeq(Seq("http://s1"), Seq("s1a", "s1b")), schema), - Entity("http://example.org/entity/2", IndexedSeq(Seq(s"http://$notIncluded"), Seq()), schema), - Entity("http://example.org/entity/3", IndexedSeq(Seq(), Seq(notIncluded)), schema), Entity("http://example.org/entity/4", IndexedSeq(Seq("http://s2a", "http://s2b"), Seq("s2a", "s2b", "s2c")), schema) ) private def mockInputTable(properties: Seq[(String, String)] = Seq.empty, @@ -79,6 +76,17 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW samplesEntities.entities.head.values.head.head must startWith ("""INSERT DATA { "s1a" } ;""") } + it should "fail when an input entity is missing a value referenced by the template" in { + val incompleteEntities = Seq( + Entity("http://example.org/entity/incomplete", IndexedSeq(Seq("http://s1"), Seq()), schema) + ) + val inputTaskMock = PlainTask("mockTask", new DummyTaskSpec(Map.empty)) + val input = Seq(GenericEntityTable(incompleteEntities, schema, inputTaskMock)) + intercept[ValidationException] { + executeTask(sparqlUpdateTemplate, input).entities.toList + } + } + it should "throw validation exception if an invalid input schema is found" in { val invalidSchema = EntitySchema("", typedPaths = IndexedSeq("s", "wrong").map(UntypedPath(_).asUntypedValueType)) val input = Seq(mockInputTable(schema = invalidSchema)) From 9abdf76fb2c877c289f655ca027af5f5c25e3989 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 13:59:07 +0200 Subject: [PATCH 61/63] Fixed SparqlUpdateTaskIntegrationTest --- .../plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala index ad9863208e..804a65d179 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala @@ -53,7 +53,7 @@ class SparqlUpdateTaskIntegrationTest extends AnyFlatSpec with Matchers with Sin ) val sparqlSelectTaskReport = taskReports(4).report sparqlSelectTaskReport.entityCount mustBe 8 - sparqlSelectTaskReport.summary.filter(r => r._1 == "No. of rows processed").map(_._2) mustBe Seq("8") + sparqlSelectTaskReport.summary.filter(r => r._1.startsWith("No. of rows processed")).map(_._2) mustBe Seq("8") val sparqlUpdateTaskReport = taskReports(5).report // Batch size is set to 2, so half the number of the SPARQL Select task From efa22ae4a6f4189d6a33363cb46ee416c369d503 Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 14:31:56 +0200 Subject: [PATCH 62/63] SPARQL tasks: Added action to show prefixes. --- .../org/silkframework/config/Prefixes.scala | 8 +++----- .../rdf/tasks/SparqlSelectCustomTask.scala | 16 +++++++++++++++- .../rdf/tasks/SparqlUpdateCustomTask.scala | 16 +++++++++++++++- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/silk-core/src/main/scala/org/silkframework/config/Prefixes.scala b/silk-core/src/main/scala/org/silkframework/config/Prefixes.scala index 71c8a2fc11..be1f47a5bf 100644 --- a/silk-core/src/main/scala/org/silkframework/config/Prefixes.scala +++ b/silk-core/src/main/scala/org/silkframework/config/Prefixes.scala @@ -87,11 +87,9 @@ case class Prefixes(prefixMap: immutable.HashMap[String, String]) extends Serial } def toSparql: String = { - var sparql = "" - for ((key, value) <- prefixMap) { - sparql += "PREFIX " + key + ": <" + value + "> " - } - sparql + prefixMap.toSeq.sortBy(_._1).map { case (key, value) => + s"PREFIX $key: <$value>" + }.mkString("\n") } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 866d4ff69c..c924d81e80 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -6,7 +6,8 @@ import org.silkframework.entity._ import org.silkframework.execution.typed.SparqlEndpointEntitySchema import org.silkframework.plugins.dataset.rdf.datasets.SparqlDataset import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate -import org.silkframework.runtime.plugin.annotations.{Param, Plugin, PluginReference} +import org.silkframework.runtime.plugin.PluginContext +import org.silkframework.runtime.plugin.annotations.{Action, Param, Plugin, PluginReference} import org.silkframework.runtime.plugin.types.SparqlCodeParameter import org.silkframework.runtime.templating.TemplateEngineAutocompletionProvider @@ -114,6 +115,19 @@ case class SparqlSelectCustomTask( } val outputSchema: EntitySchema = queryTemplate.outputSchema + + @Action( + label = "Show prefixes", + description = "Shows the available namespace prefixes as a SPARQL header that can be copied into the query." + ) + def showPrefixes(implicit pluginContext: PluginContext): String = { + val prefixes = pluginContext.prefixes + if (prefixes.prefixMap.isEmpty) { + "No prefixes are defined." + } else { + "```sparql\n" + prefixes.toSparql + "\n```" + } + } } object SparqlSelectCustomTask { diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 91f127ff28..323f855eec 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -5,7 +5,8 @@ import org.silkframework.entity._ import org.silkframework.execution.typed.SparqlUpdateEntitySchema import org.silkframework.plugins.dataset.rdf.tasks.templating._ import org.silkframework.plugins.dataset.rdf.datasets.SparqlDataset -import org.silkframework.runtime.plugin.annotations.{Param, Plugin, PluginReference} +import org.silkframework.runtime.plugin.PluginContext +import org.silkframework.runtime.plugin.annotations.{Action, Param, Plugin, PluginReference} import org.silkframework.runtime.plugin.types.SparqlCodeParameter import org.silkframework.runtime.templating.{TemplateEngineAutocompletionProvider, TemplateEngines} @@ -67,6 +68,19 @@ case class SparqlUpdateCustomTask( } override def outputPort: Option[Port] = Some(FixedSchemaPort(SparqlUpdateEntitySchema.schema)) + + @Action( + label = "Show prefixes", + description = "Shows the available namespace prefixes as a SPARQL header that can be copied into the query." + ) + def showPrefixes(implicit pluginContext: PluginContext): String = { + val prefixes = pluginContext.prefixes + if (prefixes.prefixMap.isEmpty) { + "No prefixes are defined." + } else { + "```sparql\n" + prefixes.toSparql + "\n```" + } + } } object SparqlUpdateCustomTask { From e10131a74212dd552aa9c3ffef7d2e43e0fa250b Mon Sep 17 00:00:00 2001 From: Robert Isele Date: Wed, 29 Apr 2026 14:47:09 +0200 Subject: [PATCH 63/63] SPARQL tasks: Improved and shortened error message. --- .../templating/jinja/JinjaTemplateEngine.scala | 12 +++++++++++- .../transformer/sparql/ValidateUriTransformer.scala | 2 +- .../activity/workflow/LocalWorkflowExecutor.scala | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala index a98bc5a16e..695c638c24 100644 --- a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala @@ -141,11 +141,21 @@ class JinjaTemplate(val node: Node) extends CompiledTemplate { // For now, we just throw any errors. In the future, we could improve this and add an error collector. if (!interpreter.getErrors.isEmpty) { - val msg = "Errors in template: " + interpreter.getErrors.asScala.map(_.getMessage).mkString(" ") + val messages = interpreter.getErrors.asScala.map { error => + Option(error.getException).map(rootCauseMessage).getOrElse(error.getMessage) + } + val prefix = if (messages.size == 1) "Error in template: " else "Errors in template: " + val msg = prefix + messages.mkString(" ") val cause = Option(interpreter.getErrors.get(0).getException) throw new TemplateEvaluationException(msg, cause) } } + private def rootCauseMessage(throwable: Throwable): String = { + Option(throwable.getCause) match { + case Some(cause) if cause ne throwable => rootCauseMessage(cause) + case _ => throwable.getMessage + } + } } diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala index 2557d8e294..e4dce73bbd 100644 --- a/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala @@ -39,7 +39,7 @@ case class ValidateUriTransformer() extends SimpleTransformer { Try(new URI(value)) match { case scala.util.Success(uri) if uri.isAbsolute => value case _ => - throw new ValidationException(s"Value is not a valid absolute IRI: '$value'") + throw new ValidationException(s"'$value' is not a valid absolute IRI") } } } \ No newline at end of file diff --git a/silk-workspace/src/main/scala/org/silkframework/workspace/activity/workflow/LocalWorkflowExecutor.scala b/silk-workspace/src/main/scala/org/silkframework/workspace/activity/workflow/LocalWorkflowExecutor.scala index 00becbbea3..67764dfb44 100644 --- a/silk-workspace/src/main/scala/org/silkframework/workspace/activity/workflow/LocalWorkflowExecutor.scala +++ b/silk-workspace/src/main/scala/org/silkframework/workspace/activity/workflow/LocalWorkflowExecutor.scala @@ -375,7 +375,7 @@ case class LocalWorkflowExecutor(workflowTask: ProjectTask[Workflow], () } catch { case NonFatal(ex) => - throw WorkflowExecutionException(s"Exception occurred while writing to dataset '${resolvedDataset.label()}'. Cause: " + ex.getMessage, Some(ex)) + throw WorkflowExecutionException(s"Failed to write to dataset '${resolvedDataset.label()}': " + ex.getMessage, Some(ex)) } }