diff --git a/build.sbt b/build.sbt index 5b6abb8aec..44b15753e7 100644 --- a/build.sbt +++ b/build.sbt @@ -151,13 +151,29 @@ lazy val workspace = (project in file("silk-workspace")) // Plugins ////////////////////////////////////////////////////////////////////////////// +lazy val pluginsTemplatingJinja = (project in file("silk-plugins/silk-plugins-templating-jinja")) + .dependsOn(rules % "compile->compile;test->test") + .settings(commonSettings *) + .settings( + name := "Silk Plugins Templating Jinja", + libraryDependencies += "com.hubspot.jinjava" % "jinjava" % "2.8.3" + ) + +lazy val pluginsTemplatingVelocity = (project in file("silk-plugins/silk-plugins-templating-velocity")) + .dependsOn(rules % "compile->compile;test->test") + .settings(commonSettings *) + .settings( + name := "Silk Plugins Templating Velocity", + libraryDependencies += "org.apache.velocity" % "velocity-engine-core" % "2.4.1" + ) + lazy val pluginsRdf = (project in file("silk-plugins/silk-plugins-rdf")) - .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile") + .dependsOn(rules, workspace % "test->test;compile->compile", core % "test->test;compile->compile", pluginsCsv % "test->compile", + pluginsTemplatingJinja % "test->compile", pluginsTemplatingVelocity % "test->compile") .settings(commonSettings *) .settings( name := "Silk Plugins RDF", - libraryDependencies += "org.apache.jena" % "jena-fuseki-main" % "5.6.0" % "test", - libraryDependencies += "org.apache.velocity" % "velocity-engine-core" % "2.4.1" + libraryDependencies += "org.apache.jena" % "jena-fuseki-main" % "5.6.0" % "test" ) lazy val pluginsCsv = (project in file("silk-plugins/silk-plugins-csv")) @@ -226,8 +242,8 @@ lazy val persistentCaching = (project in file("silk-plugins/silk-persistent-cach // Aggregate all plugins lazy val plugins = (project in file("silk-plugins")) - .dependsOn(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching) - .aggregate(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching) + .dependsOn(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching, pluginsTemplatingJinja, pluginsTemplatingVelocity) + .aggregate(pluginsRdf, pluginsCsv, pluginsXml, pluginsJson, pluginsAsian, serializationJson, persistentCaching, pluginsTemplatingJinja, pluginsTemplatingVelocity) .settings(commonSettings *) .settings( name := "Silk Plugins" @@ -369,7 +385,7 @@ lazy val workbenchCore = (project in file("silk-workbench/silk-workbench-core")) lazy val workbenchWorkspace = (project in file("silk-workbench/silk-workbench-workspace")) .enablePlugins(PlayScala) - .dependsOn(workbenchCore % "compile->compile;test->test", pluginsRdf, pluginsCsv % "test->compile", pluginsXml % "test->compile") + .dependsOn(workbenchCore % "compile->compile;test->test", pluginsRdf, pluginsCsv % "test->compile", pluginsXml % "test->compile", pluginsTemplatingJinja % "test->compile") .aggregate(workbenchCore) .settings(commonSettings *) .settings( diff --git a/libs/gui-elements b/libs/gui-elements index 47cdaefc57..64359f57da 160000 --- a/libs/gui-elements +++ b/libs/gui-elements @@ -1 +1 @@ -Subproject commit 47cdaefc57ccd76705366d7831227e6fe03c8302 +Subproject commit 64359f57dad3410bef34cc87d2f1e8a7cd7b8708 diff --git a/silk-core/src/main/scala/org/silkframework/config/Prefixes.scala b/silk-core/src/main/scala/org/silkframework/config/Prefixes.scala index 71c8a2fc11..be1f47a5bf 100644 --- a/silk-core/src/main/scala/org/silkframework/config/Prefixes.scala +++ b/silk-core/src/main/scala/org/silkframework/config/Prefixes.scala @@ -87,11 +87,9 @@ case class Prefixes(prefixMap: immutable.HashMap[String, String]) extends Serial } def toSparql: String = { - var sparql = "" - for ((key, value) <- prefixMap) { - sparql += "PREFIX " + key + ": <" + value + "> " - } - sparql + prefixMap.toSeq.sortBy(_._1).map { case (key, value) => + s"PREFIX $key: <$value>" + }.mkString("\n") } } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/plugin/ParameterType.scala b/silk-core/src/main/scala/org/silkframework/runtime/plugin/ParameterType.scala index 6dd95b21e5..f0c9c829a3 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/plugin/ParameterType.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/plugin/ParameterType.scala @@ -752,7 +752,7 @@ object StringParameterType { private object SparqlCodeParameterType extends CodeParameterType[SparqlCodeParameter] { override def codeMode: String = "sparql" - override def fromString(str: String)(implicit context: PluginContext): SparqlCodeParameter = SparqlCodeParameter(str) + override def fromString(str: String)(implicit context: PluginContext): SparqlCodeParameter = SparqlCodeParameter(str, Some(context.templateVariables)) } private object SqlCodeParameterType extends CodeParameterType[SqlCodeParameter] { diff --git a/silk-core/src/main/scala/org/silkframework/runtime/plugin/types/CodeParameters.scala b/silk-core/src/main/scala/org/silkframework/runtime/plugin/types/CodeParameters.scala index 7337295bf5..8d5f6a0b12 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/plugin/types/CodeParameters.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/plugin/types/CodeParameters.scala @@ -1,7 +1,9 @@ package org.silkframework.runtime.plugin.types -import scala.language.implicitConversions +import org.silkframework.runtime.templating.TemplateVariablesReader + import scala.collection.immutable.ArraySeq +import scala.language.implicitConversions sealed trait CodeParameter { def str: String @@ -23,7 +25,13 @@ object Jinja2CodeParameter { case class JsonCodeParameter(var str: String) extends CodeParameter -case class SparqlCodeParameter(var str: String) extends CodeParameter +/** + * A SPARQL code parameter that might contain Jinja template variables. + * + * @param str The SPARQL query + * @param variables That variables that are available at creation time + */ +case class SparqlCodeParameter(var str: String, val variables: Option[TemplateVariablesReader] = None) extends CodeParameter object SparqlCodeParameter { implicit def str2parameter(str: String): SparqlCodeParameter = SparqlCodeParameter(str) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/CombinedTemplateVariablesReader.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/CombinedTemplateVariablesReader.scala index a19f3f105c..872e17761a 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/CombinedTemplateVariablesReader.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/CombinedTemplateVariablesReader.scala @@ -6,7 +6,7 @@ case class CombinedTemplateVariablesReader(readers: Seq[TemplateVariablesReader] /** * The available variable scopes. */ - override def scopes: Set[String] = { + override def scopes: Set[Seq[String]] = { readers.flatMap(_.scopes).toSet } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/GlobalTemplateVariablesConfig.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/GlobalTemplateVariablesConfig.scala index 13edc6c9b1..c24ca7da38 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/GlobalTemplateVariablesConfig.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/GlobalTemplateVariablesConfig.scala @@ -75,7 +75,7 @@ object GlobalTemplateVariables extends TemplateVariablesReader with Serializable /** * The available variable scopes. */ - override def scopes: Set[String] = Set(TemplateVariableScopes.global) + override def scopes: Set[Seq[String]] = Set(TemplateVariableScopes.global) /** * Retrieves all template variables. diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/InMemoryTemplateVariablesReader.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/InMemoryTemplateVariablesReader.scala index 399d124fae..b6e700e179 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/InMemoryTemplateVariablesReader.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/InMemoryTemplateVariablesReader.scala @@ -1,3 +1,3 @@ package org.silkframework.runtime.templating -case class InMemoryTemplateVariablesReader(override val all: TemplateVariables, override val scopes: Set[String]) extends TemplateVariablesReader +case class InMemoryTemplateVariablesReader(override val all: TemplateVariables, override val scopes: Set[Seq[String]]) extends TemplateVariablesReader diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala index 0d6c20185f..a9a1804912 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngine.scala @@ -30,6 +30,14 @@ trait CompiledTemplate { */ def variables: Option[Seq[TemplateVariableName]] = None + /** + * Returns all method usages on a given variable in the template. + * Each usage contains the method name and its string parameter value. + * Only methods with a single string constant parameter are returned. + * Returns an empty sequence by default if not supported by the template engine. + */ + def methodUsages(variableName: String): Seq[TemplateMethodUsage] = Seq.empty + /** * Evaluates this template using a map of variable values. */ @@ -43,40 +51,19 @@ trait CompiledTemplate { def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig = EvaluationConfig()): Unit /** - * Evaluates this template using a provided entity. - * - * @throws TemplateEvaluationException If the evaluation failed. - */ - def evaluate(entity: Entity, writer: Writer): Unit = { - evaluate(entityToMap(entity), writer) - } - - /** - * Converts an entity to a sequence of template variables. - */ - protected def entityToMap(entity: Entity): Seq[TemplateVariableValue] = { - for((path, value) <- entity.schema.typedPaths zip entity.values if value.nonEmpty) yield { - new TemplateVariableValue(path.normalizedSerialization, "", value) - } - } - - /** - * Converts template values to a Java Map + * Converts template variable values to a nested Java-compatible map. + * Variables with an empty scope are placed at the top level. + * Variables with a scope are placed in nested maps corresponding to each scope element, + * e.g., scope Seq("project", "meta") produces Map("project" -> Map("meta" -> Map(name -> value))). */ protected def convertValues(value: Seq[TemplateVariableValue]): Map[String, AnyRef] = { - value.groupBy(_.scope).flatMap { case (scope, values) => - if (scope.isEmpty) { - for (value <- values) yield { - (value.name, IterableTemplateValues.fromValues(value.values)) - } - } else { - val nestedValues = - for (value <- values) yield { - (value.name, IterableTemplateValues.fromValues(value.values)) - } - Seq((scope, nestedValues.toMap.asJava)) - } + val (flatVars, scopedVars) = value.partition(_.scope.isEmpty) + val flatEntries = flatVars.map(v => v.name -> IterableTemplateValues.fromValues(v.values).asInstanceOf[AnyRef]) + val scopedEntries = scopedVars.groupBy(_.scope.head).map { case (topScope, vars) => + val shallowVars = vars.map(v => new TemplateVariableValue(v.name, v.scope.tail, v.values)) + topScope -> convertValues(shallowVars).asJava.asInstanceOf[AnyRef] } + (flatEntries ++ scopedEntries).toMap } } @@ -85,4 +72,12 @@ trait CompiledTemplate { * @param ignoreUnboundVariables If an unbound variable is found then instead of throwing an error the variable evaluates * to the variable name itself. */ -case class EvaluationConfig(ignoreUnboundVariables: Boolean = false) \ No newline at end of file +case class EvaluationConfig(ignoreUnboundVariables: Boolean = false) + +/** + * Represents a method invocation on a template variable with a single string parameter. + * + * @param methodName The name of the invoked method. + * @param parameterValue The string constant passed as parameter. + */ +case class TemplateMethodUsage(methodName: String, parameterValue: String) \ No newline at end of file diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngineAutocompletionProvider.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngineAutocompletionProvider.scala index cdeb12e516..31859edd83 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngineAutocompletionProvider.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngineAutocompletionProvider.scala @@ -12,10 +12,10 @@ class TemplateEngineAutocompletionProvider extends PluginParameterAutoCompletion (implicit context: PluginContext): Iterable[AutoCompletionResult] = { val multiSearchWords = extractSearchTerms(searchQuery) TemplateEngines.availableEngines - .filter(_ != DisabledTemplateEngine.id) // Disabled template engine should not be suggested to the user - .filter(_ != UnresolvedTemplateEngine.id) // Unresolved template engine should not be suggested to the user - .filter(r => matchesSearchTerm(multiSearchWords, r.toLowerCase)) - .map(r => AutoCompletionResult(r, None)) + .filter(_.id.toString != DisabledTemplateEngine.id) // Disabled template engine should not be suggested to the user + .filter(_.id.toString != UnresolvedTemplateEngine.id) // Unresolved template engine should not be suggested to the user + .filter(engine => matchesSearchTerm(multiSearchWords, engine.id.toLowerCase)) + .map(engine => AutoCompletionResult(engine.id, Some(engine.label))) } /** Returns the label if exists for the given auto-completion value. This is needed if a value should diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala index 78c2830d55..c5fc511df0 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateEngines.scala @@ -1,6 +1,6 @@ package org.silkframework.runtime.templating -import org.silkframework.runtime.plugin.{PluginContext, PluginRegistry} +import org.silkframework.runtime.plugin.{PluginContext, PluginDescription, PluginRegistry} /** * Manages available template engines. @@ -10,8 +10,8 @@ object TemplateEngines { /** * Returns a list of all available template engines. */ - def availableEngines: Set[String] = { - PluginRegistry.availablePlugins[TemplateEngine].map(_.id.toString).toSet + def availableEngines: Seq[PluginDescription[TemplateEngine]] = { + PluginRegistry.availablePlugins[TemplateEngine] } /** @@ -21,7 +21,7 @@ object TemplateEngines { */ def create(id: String): TemplateEngine = { implicit val pluginContext: PluginContext = PluginContext.empty - PluginRegistry.create[TemplateEngine](id.toLowerCase) + PluginRegistry.create[TemplateEngine](id) } } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala index 78a60f6566..501dd070a2 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariable.scala @@ -7,13 +7,22 @@ import scala.xml.{Node, PCData} /** * A single template variable. + * + * @param name The local name of the variable. + * @param value The value of the variable. + * @param template Optional template expression to compute the value dynamically. + * @param description Optional description for documentation. + * @param isSensitive True if the variable value should not be exposed to users. + * @param scope The scope as a sequence of strings forming a prefix path. May be empty. + * For example, a variable with name "label" and scope Seq("project", "metaData") + * is addressed as "project.metaData.label". */ case class TemplateVariable(override val name: String, value: String, template: Option[String] = None, description: Option[String] = None, isSensitive: Boolean = false, - override val scope: String) extends TemplateVariableValue(name, scope, values = Seq(value)) { + override val scope: Seq[String] = Seq.empty) extends TemplateVariableValue(name, scope, values = Seq(value)) { validate() @@ -49,14 +58,14 @@ object TemplateVariable { template = Option((value \ "Template").text).filter(_.trim.nonEmpty), description = Option((value \ "Description").text).filter(_.trim.nonEmpty), isSensitive = (value \ "@isSensitive").text.toBoolean, - scope = (value \ "@scope").text, + scope = (value \ "@scope").text.split('.').filter(_.nonEmpty).toSeq, ) } override def write(value: TemplateVariable)(implicit writeContext: WriteContext[Node]): Node = { + scope={value.scope.mkString(".")}> {PCData(value.value)} { value.template.toSeq.map(template => ) } {value.description.getOrElse("")} diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala new file mode 100644 index 0000000000..36833baba2 --- /dev/null +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableConversions.scala @@ -0,0 +1,44 @@ +package org.silkframework.runtime.templating + +import org.silkframework.config.{Prefixes, Task, TaskSpec} +import org.silkframework.entity.Entity +import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, SimpleParameterValue} + +object TemplateVariableConversions { + + /** + * Converts an entity to a sequence of template variables. + * + * @param entity The entity to convert. + * @param scope The scope to assign to all resulting variables. + */ + def fromEntity(entity: Entity, scope: Seq[String] = Seq.empty): Seq[TemplateVariableValue] = { + for((path, value) <- entity.schema.typedPaths zip entity.values if value.nonEmpty) yield { + new TemplateVariableValue(path.normalizedSerialization, scope, value) + } + } + + /** + * Converts a task's parameters to a sequence of template variables. + * Nested plugin parameters are placed into nested scopes using the parameter key. + * + * @param task The task whose parameters to convert. + * @param scope The base scope. Nested parameters extend this scope with the parameter key. + */ + def fromTask(task: Task[_ <: TaskSpec], scope: Seq[String] = Seq("task"))(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { + fromPluginParameters(task.data.parameters, scope) + } + + private def fromPluginParameters(values: ParameterValues, scope: Seq[String] = Seq.empty)(implicit pluginContext: PluginContext): Seq[TemplateVariableValue] = { + for((key, value) <- values.values) yield { + value match { + case sv: SimpleParameterValue => + Seq(new TemplateVariableValue(key, scope, Seq(sv.strValue))) + case nested: ParameterValues => + fromPluginParameters(nested, scope :+ key) + case _ => + Seq.empty + } + } + }.flatten.toSeq +} diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala index 98f6c92dbf..ff71d6d23b 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableName.scala @@ -4,19 +4,18 @@ package org.silkframework.runtime.templating * Holds the full name of a template variable including it's scope. * * @param name The local name of the variable. - * @param scope The scope. May be empty. + * @param scope The scope as a sequence of strings forming a prefix path. May be empty. + * For example, a variable with name "label" and scope Seq("project", "metaData") + * is addressed as "project.metaData.label". */ -class TemplateVariableName(val name: String, val scope: String) { +class TemplateVariableName(val name: String, val scope: Seq[String] = Seq.empty) { /** - * The variable name including its scope, e.g., `project.var` + * The variable name including its scope as a dot-separated string, e.g., `project.var` or `project.metaData.var`. + * If the scope is empty, this is just the local name. */ def scopedName: String = { - if (scope.nonEmpty) { - scope + "." + name - } else { - name - } + (scope :+ name).mkString(".") } override def toString: String = { @@ -36,12 +35,17 @@ class TemplateVariableName(val name: String, val scope: String) { object TemplateVariableName { + /** + * Parses a dot-separated full variable name into a [[TemplateVariableName]]. + * All segments except the last form the scope; the last segment is the local name. + * For example, "project.metaData.label" parses to name="label", scope=Seq("project","metaData"). + */ def parse(fullName: String): TemplateVariableName = { - val pointIndex = fullName.indexOf('.'.toInt) - if(pointIndex != -1) { - new TemplateVariableName(fullName.substring(pointIndex + 1), fullName.substring(0, pointIndex)) + val parts = fullName.split('.') + if (parts.length > 1) { + new TemplateVariableName(parts.last, parts.dropRight(1).toSeq) } else { - new TemplateVariableName(fullName, "") + new TemplateVariableName(fullName, Seq.empty) } } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala index 62c016322f..15fe4ca42c 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableScopes.scala @@ -1,15 +1,19 @@ package org.silkframework.runtime.templating +/** + * Predefined variable scopes. Each scope is a sequence of strings that forms a prefix path + * used to address variables, e.g., a variable "label" in scope Seq("project") is addressed as "project.label". + */ object TemplateVariableScopes { /** - * Global variables. + * Scope for global variables, addressed as "global.variableName". */ - final val global = "global" + final val global: Seq[String] = Seq("global") /** - * Project variables. + * Scope for project variables, addressed as "project.variableName". */ - final val project = "project" + final val project: Seq[String] = Seq("project") } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala index 605eb94485..13fa368451 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariableValue.scala @@ -4,10 +4,10 @@ package org.silkframework.runtime.templating * Holds the full name and value of a template variable. * * @param name The local name of the variable. - * @param scope The scope. May be empty. + * @param scope The scope as a sequence of strings forming a prefix path. May be empty. * @param values The values for this variable. */ -class TemplateVariableValue(name: String, scope: String, val values: Seq[String]) extends TemplateVariableName(name, scope) { +class TemplateVariableValue(name: String, scope: Seq[String] = Seq.empty, val values: Seq[String]) extends TemplateVariableName(name, scope) { def asName: TemplateVariableName = { new TemplateVariableName(name, scope) diff --git a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala index e7aa0ce863..0dce817eb5 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/TemplateVariablesReader.scala @@ -9,9 +9,10 @@ import org.silkframework.runtime.validation.NotFoundException trait TemplateVariablesReader { /** - * The available variable scopes. + * The available variable scopes. Each scope is represented as a sequence of strings forming a prefix path, + * e.g., Seq("project") or Seq("project", "metaData"). */ - def scopes: Set[String] + def scopes: Set[Seq[String]] /** * Retrieves all template variables. diff --git a/silk-workbench/silk-workbench-core/app/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala b/silk-core/src/main/scala/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala similarity index 54% rename from silk-workbench/silk-workbench-core/app/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala rename to silk-core/src/main/scala/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala index 645bd258a3..3a2e6c5db2 100644 --- a/silk-workbench/silk-workbench-core/app/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/templating/exceptions/UnboundVariablesException.scala @@ -1,29 +1,24 @@ package org.silkframework.runtime.templating.exceptions import org.silkframework.runtime.templating.TemplateVariableName -import org.silkframework.runtime.templating.exceptions.UnboundVariablesException.generateMessage -import org.silkframework.workbench.utils.JsonRequestException -import play.api.libs.json.{JsObject, Json} /** * Thrown if a value for an unbound variable is missing. */ class UnboundVariablesException(val missingVars: Seq[TemplateVariableName], cause: Option[Exception] = None) - extends TemplateEvaluationException(generateMessage(missingVars), cause) with JsonRequestException { + extends TemplateEvaluationException(UnboundVariablesException.generateMessage(missingVars), cause) { /** * A short description of the error type. */ override def errorTitle: String = "Unbound variables" + /** - * Json that will be included in addition to the HTTP Problem details JSON. - * Note that using reserved HTTP Problem details fields (type, title, detail) would overwrite the generated ones. - */ - override def additionalJson: JsObject = { - Json.obj( - "unboundVariables" -> missingVars.map(_.scopedName) - ) + * Include the unbound variables in the HTTP Problem details JSON. + */ + override def additionalData: Map[String, Seq[String]] = { + Map("unboundVariables" -> missingVars.map(_.scopedName)) } } diff --git a/silk-core/src/main/scala/org/silkframework/runtime/validation/RequestException.scala b/silk-core/src/main/scala/org/silkframework/runtime/validation/RequestException.scala index 15c9ac3039..e432381a7b 100644 --- a/silk-core/src/main/scala/org/silkframework/runtime/validation/RequestException.scala +++ b/silk-core/src/main/scala/org/silkframework/runtime/validation/RequestException.scala @@ -21,4 +21,9 @@ abstract class RequestException(msg: String, cause: Option[Throwable]) extends R */ def httpErrorCode: Option[Int] + /** + * Additional key-value pairs that should be included in the error response. + */ + def additionalData: Map[String, Seq[String]] = Map.empty + } diff --git a/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableNameTest.scala b/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableNameTest.scala index bde9114c2d..ca802c2bc7 100644 --- a/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableNameTest.scala +++ b/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableNameTest.scala @@ -8,9 +8,9 @@ class TemplateVariableNameTest extends AnyFlatSpec with Matchers { behavior of "TemplateVariableName" it should "parse full names" in { - TemplateVariableName.parse("project.var") shouldBe new TemplateVariableName("var", "project") - TemplateVariableName.parse("var") shouldBe new TemplateVariableName("var", "") - TemplateVariableName.parse("a.b.c") shouldBe new TemplateVariableName("b.c", "a") + TemplateVariableName.parse("project.var") shouldBe new TemplateVariableName("var", Seq("project")) + TemplateVariableName.parse("var") shouldBe new TemplateVariableName("var", Seq.empty) + TemplateVariableName.parse("a.b.c") shouldBe new TemplateVariableName("c", Seq("a", "b")) } } diff --git a/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableTest.scala b/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableTest.scala index 6f6e9a88a5..3974901af2 100644 --- a/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableTest.scala +++ b/silk-core/src/test/scala/org/silkframework/runtime/templating/TemplateVariableTest.scala @@ -19,6 +19,6 @@ class TemplateVariableTest extends AnyFlatSpec with Matchers { } } - private def variableName(name: String) = TemplateVariable(name, "test value", None, None, isSensitive = false, "testScope") + private def variableName(name: String) = TemplateVariable(name, "test value", None, None, isSensitive = false, Seq("testScope")) } diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md index 2bcc743b27..d20c3a9f4c 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.md @@ -1,22 +1,125 @@ -The SPARQL SELECT plugin is a task for executing SPARQL SELECT queries on the input RDF data source. +The SPARQL SELECT plugin is a task for executing SPARQL SELECT queries on an RDF data source. +It can be used in a workflow, connecting an input to an output. A +[SPARQL 1.1 SELECT](https://www.w3.org/TR/sparql11-query/#select) query is supported; the simplest example is +`SELECT * WHERE { ?s ?p ?o }`. -## Description +## Input and output -The SPARQL Select query plugin is an example of a _RDF task_ or _operator_. Such a task can be used in a workflow, -connecting an input to an output. In this specific case, the _input_ is — in essence — a _SPARQL endpoint_ and the -_output_ is the entity table containing the _SPARQL results_ of the SPARQL SELECT query execution. +The _input_ depends on the configuration: -In general terms, a [SPARQL 1.1 SELECT](https://www.w3.org/TR/sparql11-query/#select) query is supported. One of the -simplest examples is `SELECT * WHERE { ?s ?p ?o }`. +- By default, the query is executed against the connected input, which must be a _SPARQL endpoint_ + (i.e. an RDF dataset). +- When **Use default RDF dataset** (`useDefaultDataset`) is enabled, the query is executed against the project's + default RDF dataset instead. If the template references input entity properties, the task accepts an entity + input and generates one query per entity; otherwise it needs no input at all. -The [result limit](https://www.w3.org/TR/sparql11-query/#modResultLimit) can be specified for the SPARQL SELECT plugin -itself, with the parameter `limit`. Additionally, a timeout can be specified with the parameter `sparqlTimeout`. +The _output_ is an entity table built from the query's +[SPARQL results](https://www.w3.org/TR/sparql11-results-json/#json-result-object): each projected variable becomes +a column, and each result binding becomes a row. -As usual, the SPARQL results contain both "variables" and "bindings", such as in -[this example](https://www.w3.org/TR/sparql11-results-json/#json-result-object). -This tabular raw form is transformed into an _entity table_. +The [result size](https://www.w3.org/TR/sparql11-query/#modResultLimit) can be capped with the `limit` parameter, +and a query timeout (in milliseconds) can be set via `sparqlTimeout`. -### Internal Specifics +## Automatic `FROM` clause injection If the SPARQL source is defined on a specific graph, a `FROM` clause will be added to the query at execution time, except when there already exists a `GRAPH` or `FROM` clause in the query. `FROM NAMED` clauses are not injected. + +## Templating + +The select query is rendered by a template engine before execution. +[`Jinja`](https://jinja.palletsprojects.com/) is the default and is described below; for the deprecated `Simple` +and `Velocity Engine` modes, see "Legacy template engines" at the end. + +Jinja uses `{{ ... }}` for value expressions and `{% ... %}` for control flow such as conditionals. + +### Template variables + +The following variables are available: + +- `input.config.`: a parameter of the connected input task. +- `output.config.`: a parameter of the connected output task. +- `input.entity.`: the value of the given property on the current input entity. Only populated when + the task is configured to receive input entities (see **Use default RDF dataset** above). +- `project.`: a project-scoped template variable. +- `global.`: a global template variable. + +Parameter, property and variable names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); +bracket-subscript access such as `input.entity["urn:prop:label"]` is not supported. + +For example, to query the named graph that is configured on the input dataset: + +```sparql +SELECT * WHERE { GRAPH <{{ input.config.graph | validate_uri }}> { ?s ?p ?o } } +``` + +### Default scope + +The `defaultScope` parameter declares one scope whose variables are additionally exposed at the top level of the +template context, so they can be referenced without the scope prefix. It defaults to `input.entity`, which means +a template may write `{{ property }}` as a shorthand for `{{ input.entity.property }}`: + +``` +{{ property }} ≡ {{ input.entity.property }} +``` + +Both forms resolve to the same value. Set `defaultScope` to the empty string to disable this aliasing and require +every variable to be addressed with its full scope. + +### Filters + +Values are inserted verbatim by default, so URI brackets (`<...>`) and quotation marks around literals must be +written in the template. The following filters are provided to render values safely: + +- `validate_uri`: validates that the value is a valid absolute IRI and returns it unchanged. Throws a validation + error otherwise. Wrap the output in `<...>` in the template. +- `escape_literal`: escapes backslashes, quotes, newlines, carriage returns and tabs so the value can be used + inside a short-form SPARQL string literal (`"..."` or `'...'`). No enclosing quotes are added. +- `escape_multiline_literal`: escapes backslashes and breaks any run of three or more consecutive single or double + quotes. Use for values that are wrapped in triple-quoted SPARQL literals (`"""..."""` or `'''...'''`). + +All transformer plugins are also available as Jinja filters under their plugin id (for example `lowerCase`, +`trim`, `urlEncode`). + +### Input schema inference + +The input schema (the entity properties the task expects) is derived by scanning the raw template for +`input.entity.` references (or bare references resolved via `defaultScope`). This scan operates +on the template text before rendering, so SPARQL line comments (`# ...`) are **not** stripped: a +commented-out line such as + +```sparql +# {{ input.entity.property }} +``` + +will still cause `property` to appear in the inferred input schema. + +### Output schema inference + +The output schema is derived from the raw template by a heuristic, without rendering it. The heuristic takes +the projection between `SELECT` and the first `WHERE`, `FROM` or `{`, drops a leading `DISTINCT` / `REDUCED`, +and then: + +- For `SELECT *`, collects every distinct `?var` token in the query. +- Otherwise, collects each top-level `?var` and the trailing `AS ?alias` from parenthesised expressions + (e.g. `(COUNT(?s) AS ?count)` yields `count`). + +Each variable becomes a string-typed path. If no variables can be detected (e.g. the projection is produced by +a Jinja expression), the output port is reported with an unknown schema. + +### Validation + +At task creation, the Jinja template is checked against the available template variables: + +- Every `project.<...>` or `global.<...>` reference must resolve to a known variable, matched on the full + scoped name (so e.g. `project.metaData.label` is looked up at that exact scope). +- Every `input.<...>` or `output.<...>` reference must use `config` or `entity` as its second segment. + +Bare references are resolved through `defaultScope` before applying the same rules. The template is not +rendered and the resulting SPARQL is not parsed. + +### Legacy template engines + +In addition to Jinja, two deprecated template engines are supported for backwards compatibility: `Simple` +and [`Velocity Engine`](https://velocity.apache.org/engine/2.4.1/user-guide.html). Their syntax is identical +to the one used by the `SPARQL Update operator` and is documented there. diff --git a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md index 3afdaac799..56ac93a01c 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md +++ b/silk-plugins/silk-plugins-rdf/src/main/resources/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.md @@ -11,13 +11,62 @@ _execute_ these queries, we need to connect this task from an input into an outp ## Templating -The SPARQL Update query plugin uses a template in order to construct and output SPARQL update queries. -There are two possible template engines supported by this plugin: a `Simple` engine and +The `sparqlUpdateOperator` plugin uses a **template** in order to construct and output SPARQL update queries. +Three template engines are supported: `Jinja` (the default), `Simple`, and [`Velocity Engine`](https://velocity.apache.org/engine/2.4.1/user-guide.html). -Each of these engines supports a different set of templating features, such as for example _variable interpolation_ with -the dollar sign (`$`), i.e. filling in input values via placeholders in the template. +The `Simple` and `Velocity Engine` modes are deprecated. -### Example of the `Simple` mode +### Example of the `Jinja` mode + +[Jinja](https://jinja.palletsprojects.com/) is the recommended template engine. It uses `{{ }}` for expressions and +`{% %}` for control flow statements such as conditionals. + +``` +DELETE DATA { <{{ input.entity.subject | validate_uri }}> rdfs:label "{{ input.entity.oldLabel | escape_literal }}" } ; +{% if input.entity.subject %} + INSERT DATA { <{{ input.entity.subject | validate_uri }}> rdfs:label "{{ input.entity.newLabel | escape_literal }}" } ; +{% endif %} +``` + +The following variables are available: + +- `input.entity.`: the value of the given property on the current input entity. +- `input.config.`: a parameter of the connected input task. +- `output.config.`: a parameter of the connected output task. +- `project.`: a project-scoped template variable. +- `global.`: a global template variable. + +Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`); bracket-subscript access such as +`input.entity["urn:prop:label"]` is not supported. + +Values are inserted verbatim by default, so URI brackets (`<...>`) and quotation marks around literals must be +written in the template. The following filters are provided to render values safely: + +- `validate_uri`: validates that the value is a valid absolute IRI and returns it unchanged. Throws a validation + error otherwise. Wrap the output in `<...>` in the template. +- `escape_literal`: escapes backslashes, quotes, newlines, carriage returns and tabs so the value can be used + inside a short-form SPARQL string literal (`"..."` or `'...'`). No enclosing quotes are added. +- `escape_multiline_literal`: escapes backslashes and breaks any run of three or more consecutive single or double + quotes. Use for values that are wrapped in triple-quoted SPARQL literals (`"""..."""` or `'''...'''`). + +All transformer plugins are also available as Jinja filters under their plugin id (for example `lowerCase`, +`trim`, `urlEncode`). + +### Validation + +At task creation, the template is checked against the available template variables. What is checked depends +on the selected templating mode: + +- `Jinja`: + - Every `project.<...>` or `global.<...>` reference must resolve to a known variable, matched on the full + scoped name (so e.g. `project.metaData.label` is looked up at that exact scope). + - Every `input.<...>` or `output.<...>` reference must use `config` or `entity` as its second segment. + - The template is not rendered and the resulting SPARQL is not parsed. +- `Simple` / `Velocity Engine`: + - The template is rendered once with placeholder values and the result must parse as a SPARQL Update query. + - Templates that use `rawUnsafe` skip this parse check. + +### Example of the `Simple` mode (deprecated) ``` DELETE DATA { ${} rdf:label ${"PROP_FROM_ENTITY_SCHEMA2"} } @@ -32,7 +81,7 @@ Furthermore, it will insert a plain literal serialization for the property value It is also possible to write something like `${"PROP"}^^` or `${"PROP"}@en`. In other words, we can combine variable substitutions with fixed expressions to construct semi-flexible expressions within the template. -### Example of the `Velocity Engine` mode +### Example of the `Velocity Engine` mode (deprecated) ``` DELETE DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/DefaultRdfDataset.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/DefaultRdfDataset.scala new file mode 100644 index 0000000000..2288cc400f --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/DefaultRdfDataset.scala @@ -0,0 +1,28 @@ +package org.silkframework.plugins.dataset.rdf + +import org.silkframework.dataset.Dataset +import org.silkframework.dataset.rdf.RdfDataset +import org.silkframework.runtime.plugin.{PluginContext, PluginRegistry} + +/** + * Resolves the RDF dataset configured under `dataset.defaultRdf`. + * + * Used by SPARQL query tasks that want to submit their query directly to a configured dataset + * rather than to an RDF dataset wired up via an input port. + */ +object DefaultRdfDataset { + + private val configKey = "dataset.defaultRdf" + + def resolve()(implicit pluginContext: PluginContext): RdfDataset = { + PluginRegistry.createFromConfigOption[Dataset](configKey) match { + case Some(rdf: RdfDataset) => + rdf + case Some(other) => + throw new IllegalStateException( + s"Plugin configured at '$configKey' is not an RdfDataset: ${other.getClass.getSimpleName}") + case None => + throw new IllegalStateException(s"No default RDF dataset configured at '$configKey'.") + } + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala index 9342e83304..b2da803653 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/RdfPlugins.scala @@ -3,6 +3,7 @@ package org.silkframework.plugins.dataset.rdf import org.silkframework.plugins.dataset.rdf.datasets.{AlignmentDataset, InMemoryDataset, RdfFileDataset, SparqlDataset} import org.silkframework.plugins.dataset.rdf.executors.{LocalSparqlCopyExecutor, LocalSparqlSelectExecutor, LocalSparqlUpdateExecutor} import org.silkframework.plugins.dataset.rdf.tasks.{SparqlCopyCustomTask, SparqlSelectCustomTask, SparqlUpdateCustomTask} +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSimpleTemplateEngine import org.silkframework.plugins.dataset.rdf.vocab.{InMemoryVocabularyManager, RdfFilesVocabularyManager, RdfProjectFilesVocabularyManager, RdfVocabularyManager} import org.silkframework.runtime.plugin.{AnyPlugin, PluginModule} @@ -20,7 +21,8 @@ class RdfPlugins extends PluginModule { classOf[InMemoryVocabularyManager], classOf[SparqlSelectCustomTask], classOf[SparqlCopyCustomTask], - classOf[SparqlUpdateCustomTask] + classOf[SparqlUpdateCustomTask], + classOf[SparqlSimpleTemplateEngine] ) ++ executors val executors = Seq( diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala index d05e9ff51d..a1b4b4872e 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutor.scala @@ -1,17 +1,21 @@ package org.silkframework.plugins.dataset.rdf.executors import org.silkframework.config.{Prefixes, Task, TaskSpec} -import org.silkframework.dataset.DataSource -import org.silkframework.dataset.rdf.{SparqlEndpoint, SparqlResults} +import org.silkframework.dataset.{DataSource, DatasetSpec} +import org.silkframework.dataset.rdf.{RdfDataset, RdfNode, SparqlEndpoint, SparqlResults} import org.silkframework.entity.Entity import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution, LocalExecutor} import org.silkframework.execution.typed.SparqlEndpointEntitySchema -import org.silkframework.execution.{ExecutionReport, ExecutionReportUpdater, ExecutorOutput, TaskException} +import org.silkframework.execution.{ExecutionReport, ExecutionReportUpdater, ExecutorOutput, ReportingIterator, TaskException} +import org.silkframework.plugins.dataset.rdf.DefaultRdfDataset import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask +import org.silkframework.plugins.dataset.rdf.tasks.templating.TaskProperties import org.silkframework.runtime.activity.{ActivityContext, UserContext} import org.silkframework.runtime.iterator.{AbstractRewindableEntityIterator, CloseableIterator} import org.silkframework.runtime.plugin.PluginContext +import scala.collection.immutable.SortedMap + /** * Local executor for [[SparqlSelectCustomTask]]. */ @@ -23,40 +27,97 @@ case class LocalSparqlSelectExecutor() extends LocalExecutor[SparqlSelectCustomT context: ActivityContext[ExecutionReport]) (implicit pluginContext: PluginContext): Option[LocalEntities] = { val taskData = task.data - implicit val user: UserContext = pluginContext.user + implicit val prefixes: Prefixes = pluginContext.prefixes + implicit val executionReportUpdater: SparqlSelectExecutionReportUpdater = SparqlSelectExecutionReportUpdater(task, context) inputs match { case Seq(SparqlEndpointEntitySchema(sparql)) => - implicit val executionReportUpdater: SparqlSelectExecutionReportUpdater = SparqlSelectExecutionReportUpdater(task, context) - val entities = new LocalSparqlSelectIterator(taskData, sparql.task.data.plugin.sparqlEndpoint, executionReportUpdater = Some(executionReportUpdater)) - Some(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task)) + val entities = executeOnSparqlEndpoint(taskData, sparql.task, output.task, executionReportUpdater = Some(executionReportUpdater)) + Some(ReportingIterator.addReporter(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task))) + case Seq() if taskData.useDefaultDataset => + val rdfDataset = DefaultRdfDataset.resolve() + val entities = executeOnDefaultDataset(taskData, rdfDataset, output.task, executionReportUpdater = Some(executionReportUpdater)) + Some(ReportingIterator.addReporter(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task))) + case Seq(input) if taskData.useDefaultDataset => + val rdfDataset = DefaultRdfDataset.resolve() + val entities = executeOnDefaultDatasetPerEntity(taskData, rdfDataset, input, output.task, executionReportUpdater) + Some(ReportingIterator.addReporter(GenericEntityTable(entities, entitySchema = taskData.outputSchema, task))) case _ => throw TaskException("SPARQL select executor did not receive a SPARQL endpoint as requested!") } } + + def executeOnSparqlEndpoint(sparqlSelectTask: SparqlSelectCustomTask, + inputTask: Task[DatasetSpec[RdfDataset]], + outputTask: Option[Task[_ <: TaskSpec]], + limit: Int = Integer.MAX_VALUE, + executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) + (implicit pluginContext: PluginContext): LocalSparqlSelectIterator = { + new LocalSparqlSelectIterator(sparqlSelectTask, inputTask.data.plugin.sparqlEndpoint, Some(inputTask), outputTask, limit, executionReportUpdater) + } + + private def executeOnDefaultDataset(sparqlSelectTask: SparqlSelectCustomTask, + rdfDataset: RdfDataset, + outputTask: Option[Task[_ <: TaskSpec]], + limit: Int = Integer.MAX_VALUE, + executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) + (implicit pluginContext: PluginContext): LocalSparqlSelectIterator = { + new LocalSparqlSelectIterator(sparqlSelectTask, rdfDataset.sparqlEndpoint, None, outputTask, limit, executionReportUpdater) + } + + def executeOnDefaultDatasetPerEntity(sparqlSelectTask: SparqlSelectCustomTask, + rdfDataset: RdfDataset, + input: LocalEntities, + outputTask: Option[Task[_ <: TaskSpec]], + executionReportUpdater: SparqlSelectExecutionReportUpdater, + limit: Int = Integer.MAX_VALUE) + (implicit pluginContext: PluginContext): CloseableIterator[Entity] = { + implicit val user: UserContext = pluginContext.user + val sparqlEndpoint = rdfDataset.sparqlEndpoint + val selectLimit = math.min(sparqlSelectTask.intLimit.getOrElse(Integer.MAX_VALUE), limit) + val taskProperties = TaskProperties.create(Some(input.task), outputTask, pluginContext) + val templateVariables = pluginContext.templateVariables.all.variables + val expectedSchema = sparqlSelectTask.expectedInputSchema + val vars = LocalSparqlSelectIterator.getSparqlVars(sparqlSelectTask) + + val bindings = input.entities.flatMap { entity => + val values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) + val projected = Entity(entity.uri, values, expectedSchema) + val queries = sparqlSelectTask.queryTemplate.generate(Some(projected), taskProperties, templateVariables) + queries.iterator.flatMap { query => + executionReportUpdater.increaseQueryCounter() + LocalSparqlSelectIterator.executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)).bindings + } + } + LocalSparqlSelectIterator.createEntities(sparqlSelectTask, bindings, vars) + } } /** - * Iterator that executes a SPARQL SELECT query on a SPARQL endpoint and returns the results as entities. + * Rewindable iterator that executes a SPARQL SELECT query and returns the results as entities. */ class LocalSparqlSelectIterator(sparqlSelectTask: SparqlSelectCustomTask, - sparql: SparqlEndpoint, + sparqlEndpoint: SparqlEndpoint, + inputTask: Option[Task[_ <: TaskSpec]], + outputTask: Option[Task[_ <: TaskSpec]], limit: Int = Integer.MAX_VALUE, executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]) - (implicit userContext: UserContext) extends AbstractRewindableEntityIterator { + (implicit pluginContext: PluginContext) extends AbstractRewindableEntityIterator { + private implicit val user: UserContext = pluginContext.user override def newIterator(): CloseableIterator[Entity] = { val selectLimit = math.min(sparqlSelectTask.intLimit.getOrElse(Integer.MAX_VALUE), limit) - val results = select(sparqlSelectTask, sparql, selectLimit) - val vars: IndexedSeq[String] = getSparqlVars(sparqlSelectTask) - createEntities(sparqlSelectTask, results, vars, executionReportUpdater) - } - - private def select(sparqlSelectTask: SparqlSelectCustomTask, sparql: SparqlEndpoint, selectLimit: Int) - (implicit userContext: UserContext): SparqlResults = { - executeSelect(sparql, sparqlSelectTask.selectQuery.str, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + val taskProperties = TaskProperties.create(inputTask, outputTask, pluginContext) + val templateVariables = pluginContext.templateVariables.all.variables + val query = sparqlSelectTask.queryTemplate.generate(None, taskProperties, templateVariables).head + executionReportUpdater.foreach(_.increaseQueryCounter()) + val results = LocalSparqlSelectIterator.executeSelect(sparqlEndpoint, query, selectLimit, Some(sparqlSelectTask.sparqlTimeout)) + val vars = LocalSparqlSelectIterator.getSparqlVars(sparqlSelectTask) + LocalSparqlSelectIterator.createEntities(sparqlSelectTask, results.bindings, vars) } +} +object LocalSparqlSelectIterator { /** * Executes the select query on the SPARQL endpoint. * @@ -65,11 +126,11 @@ class LocalSparqlSelectIterator(sparqlSelectTask: SparqlSelectCustomTask, * @param timeout An optional timeout in ms for the query execution. If defined it should have an positive value, else it will be ignored. * This timeout is passed to the underlying SPARQL endpoint implementation. */ - private def executeSelect(sparqlEndpoint: SparqlEndpoint, - query: String, - limit: Int = Integer.MAX_VALUE, - timeout: Option[Int] = None) - (implicit userContext: UserContext): SparqlResults = { + def executeSelect(sparqlEndpoint: SparqlEndpoint, + query: String, + limit: Int = Integer.MAX_VALUE, + timeout: Option[Int] = None) + (implicit userContext: UserContext): SparqlResults = { timeout match { case Some(timeoutInMs) if timeoutInMs > 0 => val updatedParams = sparqlEndpoint.sparqlParams.copy(timeout = timeout) @@ -79,8 +140,8 @@ class LocalSparqlSelectIterator(sparqlSelectTask: SparqlSelectCustomTask, } } - private def getSparqlVars(taskData: SparqlSelectCustomTask): IndexedSeq[String] = { - val vars = taskData.outputSchema.typedPaths map { v => + def getSparqlVars(taskData: SparqlSelectCustomTask): IndexedSeq[String] = { + taskData.outputSchema.typedPaths map { v => v.propertyUri match { case Some(prop) => prop.uri @@ -88,48 +149,39 @@ class LocalSparqlSelectIterator(sparqlSelectTask: SparqlSelectCustomTask, throw TaskException("Path in input schema of SPARQL select operator is not a simple forward property: " + v.toUntypedPath.normalizedSerialization) } } - vars } - private def createEntities(taskData: SparqlSelectCustomTask, - results: SparqlResults, - vars: IndexedSeq[String], - executionReportUpdater: Option[SparqlSelectExecutionReportUpdater]): CloseableIterator[Entity] = { - implicit val prefixes: Prefixes = Prefixes.empty - var schemaReported = false - val increase: (Entity => Unit) = (entity: Entity) => executionReportUpdater match { - case Some(updater) => - if (!schemaReported) { - schemaReported = true - updater.startNewOutputSamples(entity.schema) - } - updater.addEntityAsSampleEntity(entity) - updater.increaseEntityCounter() - case None => // no-op - } - + def createEntities(taskData: SparqlSelectCustomTask, + bindings: CloseableIterator[SortedMap[String, RdfNode]], + vars: IndexedSeq[String]): CloseableIterator[Entity] = { var count = 0 - val entityIterator = results.bindings.map { binding => + bindings.map { binding => count += 1 - val values = vars map { v => - binding.get(v).toSeq.map(_.value) - } - val entity = Entity(DataSource.URN_NID_PREFIX + count, values = values, schema = taskData.outputSchema) - increase(entity) - entity + val values = vars.map(v => binding.get(v).toSeq.map(_.value)) + Entity(DataSource.URN_NID_PREFIX + count, values = values, schema = taskData.outputSchema) } - entityIterator.thenClose(() => executionReportUpdater.foreach(updater => updater.executionDone())) } } case class SparqlSelectExecutionReportUpdater(task: Task[TaskSpec], context: ActivityContext[ExecutionReport]) extends ExecutionReportUpdater { - override def operationLabel: Option[String] = Some("generate queries") + private var queriesStarted = 0 + + def increaseQueryCounter(): Unit = { + queriesStarted += 1 + } override def entityLabelSingle: String = "Row" override def entityLabelPlural: String = "Rows" - override def minEntitiesBetweenUpdates: Int = 1 + override def entityProcessVerb: String = { + val queryWord = if (queriesStarted == 1) "query" else "queries" + s"processed ($queriesStarted $queryWord)" + } + + override def additionalFields(): Seq[(String, String)] = { + Seq("Queries" -> queriesStarted.toString).filter(_ => queriesStarted > 0) + } } \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala index 20a0d5c84d..01a579b4bd 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlUpdateExecutor.scala @@ -28,16 +28,17 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT val updateTask = task.data val expectedSchema = updateTask.expectedInputSchema + val templateVariables = pluginContext.templateVariables.all.variables + // Generate SPARQL Update queries for input entities def executeOnInput[U](batchEmitter: BatchSparqlUpdateEmitter[U], expectedProperties: IndexedSeq[String], input: LocalEntities): Unit = { val inputProperties = getInputProperties(input.entitySchema).distinct - val taskProperties = createTaskProperties(Some(input.task), output.task, pluginContext) + val taskProperties = TaskProperties.create(Some(input.task), output.task, pluginContext) checkInputSchema(expectedProperties, inputProperties.toSet) - for (entity <- input.entities; - values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) if values.forall(_.nonEmpty)) { - val it = CrossProductIterator(values, expectedProperties) - while (it.hasNext) { - val query = updateTask.generate(it.next(), taskProperties) + for (entity <- input.entities) { + val values = expectedSchema.typedPaths.map(tp => entity.valueOfPath(tp.toUntypedPath)) + val projected = Entity(entity.uri, values, expectedSchema) + for (query <- updateTask.compiledTemplate.generate(Some(projected), taskProperties, templateVariables)) { batchEmitter.update(query) } } @@ -74,19 +75,9 @@ case class LocalSparqlUpdateExecutor() extends LocalExecutor[SparqlUpdateCustomT inputTask: Option[Task[_ <: TaskSpec]] = None, outputTask: Option[Task[_ <: TaskSpec]] = None) (implicit pluginContext: PluginContext): Unit = { - val taskProperties = createTaskProperties(inputTask = inputTask, outputTask = outputTask, pluginContext = pluginContext) - val query = updateTask.generate(Map.empty, taskProperties) - batchEmitter.update(query) - } - - private def createTaskProperties(inputTask: Option[Task[_ <: TaskSpec]], - outputTask: Option[Task[_ <: TaskSpec]], - pluginContext: PluginContext): TaskProperties = { - // It's obligatory to have empty prefixes here, since we do not want to have prefixed URIs for URI parameters - implicit val updatedPluginContext: PluginContext = PluginContext.updatedPluginContext(pluginContext, prefixes = Some(Prefixes.empty)) - val inputProperties = inputTask.toSeq.flatMap(_.parameters.toStringMap).toMap - val outputProperties = outputTask.toSeq.flatMap(_.parameters.toStringMap).toMap - TaskProperties(inputProperties, outputProperties) + val taskProperties = TaskProperties.create(inputTask = inputTask, outputTask = outputTask, pluginContext = pluginContext) + val templateVariables = pluginContext.templateVariables.all.variables + updateTask.compiledTemplate.generate(None, taskProperties, templateVariables).foreach(batchEmitter.update) } // Check that expected schema is subset of input schema @@ -117,49 +108,6 @@ case class SparqlUpdateExecutionReportUpdater(task: Task[TaskSpec], override def minEntitiesBetweenUpdates: Int = 1 } -case class CrossProductIterator(values: IndexedSeq[Seq[String]], - properties: IndexedSeq[String]) extends Iterator[Map[String, String]] { - assert(values.nonEmpty) - private val sizes = values.map(_.size).toArray - // Holds the current index combination - private val indexes = new Array[Int](values.size) - private val firstNonEmptyIdx = sizes.zipWithIndex.filter(_._1 > 0).map(_._2).headOption.getOrElse(-1) // -1 if all are empty - private val lastIndex = values.size - 1 - private var first: Boolean = true // This makes sure that at least one assignment is always generated - - override def hasNext: Boolean = first || firstNonEmptyIdx > -1 && (indexes(firstNonEmptyIdx) < sizes(firstNonEmptyIdx)) - - override def next(): Map[String, String] = { - if(!hasNext) { - throw new IllegalStateException("Iterator is fully consumed and has no more values!") - } - val nextAssignment = indexes.zipWithIndex.collect { - case (valueIdx, propertyIndex) if sizes(propertyIndex) > 0 => properties(propertyIndex) -> values(propertyIndex)(valueIdx) - }.toMap - setNextIndexCombinations() - first = false - nextAssignment - } - - private def setNextIndexCombinations(): Unit = { - var idx = lastIndex - while(idx > -1) { - indexes(idx) += 1 - if(indexes(idx) >= sizes(idx) && idx != firstNonEmptyIdx) { // Do not reset the first index, because of hasNext check - indexes(idx) = 0 - idx -= 1 - } else if(idx > 0) { - for(i <- (idx + 1) to lastIndex) { // null all index values after this index - indexes(i) = 0 - } - idx = -1 - } else { - idx = -1 - } - } - } -} - case class BatchSparqlUpdateEmitter[U](f: String => U, batchSize: Int, reportUpdater: SparqlUpdateExecutionReportUpdater) { private var sparqlUpdateQueries = new StringBuffer() private var queryCount = 0 diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlEndpointDatasetAutoCompletionProvider.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlEndpointDatasetAutoCompletionProvider.scala index 735fc48964..827088fd2a 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlEndpointDatasetAutoCompletionProvider.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlEndpointDatasetAutoCompletionProvider.scala @@ -24,7 +24,12 @@ case class SparqlEndpointDatasetAutoCompletionProvider() extends PluginParameter override def valueToLabel(value: String, dependOnParameterValues: Seq[ParamValue], workspace: WorkspaceReadTrait) (implicit context: PluginContext): Option[String] = { implicit val userContext: UserContext = context.user - val projectId = context.projectId.getOrElse(throw new ValidationException("Project not provided")) - workspace.project(projectId).taskOption[GenericDatasetSpec](value).flatMap(_.metaData.label) + if(value == "") { + // No endpoint selected + None + } else { + val projectId = context.projectId.getOrElse(throw new ValidationException("Project not provided")) + workspace.project(projectId).taskOption[GenericDatasetSpec](value).flatMap(_.metaData.label) + } } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala index 9e643a9fc8..c924d81e80 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlSelectCustomTask.scala @@ -1,18 +1,16 @@ package org.silkframework.plugins.dataset.rdf.tasks -import org.apache.jena.query.QueryFactory -import org.silkframework.config.{CustomTask, FixedNumberOfInputs, FixedSchemaPort, InputPorts, Port} +import org.silkframework.config._ import org.silkframework.dataset.rdf.SparqlEndpointDatasetParameter import org.silkframework.entity._ -import org.silkframework.entity.paths.{TypedPath, UntypedPath} import org.silkframework.execution.typed.SparqlEndpointEntitySchema import org.silkframework.plugins.dataset.rdf.datasets.SparqlDataset -import org.silkframework.runtime.plugin.annotations.{Param, Plugin, PluginReference} +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlTemplate +import org.silkframework.runtime.plugin.PluginContext +import org.silkframework.runtime.plugin.annotations.{Action, Param, Plugin, PluginReference} import org.silkframework.runtime.plugin.types.SparqlCodeParameter -import org.silkframework.runtime.validation.ValidationException -import org.silkframework.util.Uri +import org.silkframework.runtime.templating.TemplateEngineAutocompletionProvider -import scala.jdk.CollectionConverters.ListHasAsScala import scala.util.Try /** @@ -22,10 +20,7 @@ import scala.util.Try @Plugin( id = SparqlSelectCustomTask.pluginId, label = "SPARQL Select query", - description = - "A task that executes a SPARQL Select query on a SPARQL enabled data source and outputs the SPARQL result." + - " If the SPARQL source is defined on a specific graph, a FROM clause will be added to the query at execution time," + - " except when there already exists a GRAPH or FROM clause in the query. FROM NAMED clauses are not injected.", + description = "A task that executes a SPARQL Select query and outputs the SPARQL result.", documentationFile = "SparqlSelectCustomTask.md", iconFile = "sparql-select-query.svg", relatedPlugins = Array( @@ -40,7 +35,13 @@ import scala.util.Try ) ) case class SparqlSelectCustomTask( - @Param(label = "Select query", value = "A SPARQL 1.1 select query", example = "select * where { ?s ?p ?o }") + @Param( + label = "Select query", + value = "A SPARQL 1.1 select query. The query supports Jinja templating. " + + "Parameters of the connected input and output tasks can be accessed via 'input.config.' and 'output.config.'. " + + "Project and global template variables are available as 'project.' and 'global.'. " + + "Example: SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } }", + example = "select * where { ?s ?p ?o }") selectQuery: SparqlCodeParameter, @Param(label = "Result limit", value = "If set to a positive integer, the number of results is limited") limit: String = "", @@ -50,38 +51,82 @@ case class SparqlSelectCustomTask( autoCompletionProvider = classOf[SparqlEndpointDatasetAutoCompletionProvider], autoCompleteValueWithLabels = true, allowOnlyAutoCompletedValues = true) optionalInputDataset: SparqlEndpointDatasetParameter = SparqlEndpointDatasetParameter(""), + @Param( + label = "Use default RDF dataset", + value = "If enabled, the SELECT query is submitted directly to the configured default RDF dataset." + + " If the query template references input entities, one query is generated per input entity." + ) + useDefaultDataset: Boolean = false, + @Param( + value = "The templating mode for the template engine.", + autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider], + autoCompleteValueWithLabels = true + ) + templatingMode: String = "jinja", + @Param( + label = "Default scope", + value = "Variables from this scope can be accessed without the scope prefix in Jinja. " + + "For example, with default scope 'input.entity', a template may reference '{{ property }}' instead of '{{ input.entity.property }}'. " + + "Leave empty to disable." + ) + defaultScope: String = "input.entity", @Param( label = "SPARQL query timeout (ms)", value = "SPARQL query timeout (select/update) in milliseconds. A value of zero means that there is no timeout set explicitly." + - " If a value greater zero is specified this overwrites possible default timeouts." + " If a value greater zero is specified this overwrites possible default timeouts.", + advanced = true ) - sparqlTimeout: Int = 0 + sparqlTimeout: Int = 0, ) extends CustomTask { val intLimit: Option[Int] = { // Only allow positive ints Try(limit.toInt).filter(_ > 0).toOption } + private val defaultScopePath: Seq[String] = defaultScope.split('.').map(_.trim).filter(_.nonEmpty).toSeq + + val queryTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, selectQuery.str, defaultScopePath) + for(variables <- selectQuery.variables) { + queryTemplate.validate(variables, None) + } + + def isStaticTemplate: Boolean = queryTemplate.isStaticTemplate + + def expectedInputSchema: EntitySchema = queryTemplate.inputSchema + override def inputPorts: InputPorts = { - FixedNumberOfInputs(Seq(FixedSchemaPort(SparqlEndpointEntitySchema.schema))) + if (useDefaultDataset) { + if (isStaticTemplate) { + InputPorts.NoInputPorts + } else { + FixedNumberOfInputs(Seq(FixedSchemaPort(expectedInputSchema))) + } + } else { + FixedNumberOfInputs(Seq(FixedSchemaPort(SparqlEndpointEntitySchema.schema))) + } } override def outputPort: Option[Port] = { - Some(FixedSchemaPort(outputSchema)) + if (outputSchema.typedPaths.isEmpty) { + Some(UnknownSchemaPort) + } else { + Some(FixedSchemaPort(outputSchema)) + } } - val outputSchema: EntitySchema = { - val query = QueryFactory.create(selectQuery.str) - if (!query.isSelectType) { - throw new ValidationException("Query is not a SELECT query!") - } - val typedPaths = query.getResultVars.asScala map { v => - TypedPath(UntypedPath(v), ValueType.STRING, isAttribute = false) + val outputSchema: EntitySchema = queryTemplate.outputSchema + + @Action( + label = "Show prefixes", + description = "Shows the available namespace prefixes as a SPARQL header that can be copied into the query." + ) + def showPrefixes(implicit pluginContext: PluginContext): String = { + val prefixes = pluginContext.prefixes + if (prefixes.prefixMap.isEmpty) { + "No prefixes are defined." + } else { + "```sparql\n" + prefixes.toSparql + "\n```" } - EntitySchema( - typeUri = Uri(""), - typedPaths = typedPaths.toIndexedSeq - ) } } diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala index 0c4ce68241..323f855eec 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/SparqlUpdateCustomTask.scala @@ -5,9 +5,11 @@ import org.silkframework.entity._ import org.silkframework.execution.typed.SparqlUpdateEntitySchema import org.silkframework.plugins.dataset.rdf.tasks.templating._ import org.silkframework.plugins.dataset.rdf.datasets.SparqlDataset -import org.silkframework.runtime.plugin.annotations.{Param, Plugin, PluginReference} +import org.silkframework.runtime.plugin.PluginContext +import org.silkframework.runtime.plugin.annotations.{Action, Param, Plugin, PluginReference} import org.silkframework.runtime.plugin.types.SparqlCodeParameter +import org.silkframework.runtime.templating.{TemplateEngineAutocompletionProvider, TemplateEngines} @Plugin( id = SparqlUpdateCustomTask.pluginId, @@ -32,40 +34,30 @@ case class SparqlUpdateCustomTask( @Param( label = "SPARQL update query", value = "The SPARQL UPDATE template for constructing SPARQL UPDATE queries for every entity from the input." + - " The possible values for the template engine are `Simple` and `Velocity Engine`." + + " The possible values for the template engine are `Jinja` (default), `Simple` and `Velocity Engine`." + " See the general documentation of this plugin for further details on the features of each template engine.", - example = "DELETE DATA { ${} rdf:label ${\"PROP_FROM_ENTITY_SCHEMA2\"} }" + example = "INSERT DATA { <{{ input.entity.subject }}> rdfs:label \"{{ input.entity.label }}\" } ;" ) sparqlUpdateTemplate: SparqlCodeParameter, @Param(label = "Batch size", value = "How many entities should be handled in a single update request.") batchSize: Int = SparqlUpdateCustomTask.defaultBatchSize, @Param( - "The templating mode for the template engine. The possible values are `Simple` and `Velocity Engine`." + - " See the general documentation of this plugin for further details on the features of each template engine.", + value = "The templating mode for the template engine. See the general documentation of this plugin for further details on the features of each template engine.", + autoCompletionProvider = classOf[TemplateEngineAutocompletionProvider], + autoCompleteValueWithLabels = true ) - templatingMode: SparqlUpdateTemplatingMode = SparqlUpdateTemplatingMode.simple + templatingMode: String = "jinja" ) extends CustomTask { assert(batchSize >= 1, "Batch size must be greater zero!") - val templatingEngine: SparqlUpdateTemplatingEngine = templatingMode match { - case SparqlUpdateTemplatingMode.simple => SparqlUpdateTemplatingEngineSimple(sparqlUpdateTemplate.str, batchSize) - case SparqlUpdateTemplatingMode.velocity => SparqlTemplatingEngineVelocity(sparqlUpdateTemplate.str, batchSize) + val compiledTemplate: SparqlTemplate = SparqlTemplate.create(templatingMode, sparqlUpdateTemplate.str) + for(variables <- sparqlUpdateTemplate.variables) { + compiledTemplate.validate(variables, None) } - templatingEngine.validate() - - def isStaticTemplate: Boolean = templatingEngine.isStaticTemplate + def isStaticTemplate: Boolean = compiledTemplate.isStaticTemplate - def expectedInputSchema: EntitySchema = templatingEngine.inputSchema - - /** - * Generates The SPARQL Update query based on the placeholder assignments. - * @param placeholderAssignments For each placeholder in the query template - * @return - */ - def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { - templatingEngine.generate(placeholderAssignments, taskProperties) - } + def expectedInputSchema: EntitySchema = compiledTemplate.inputSchema override def inputPorts: InputPorts = { if(isStaticTemplate) { @@ -76,9 +68,22 @@ case class SparqlUpdateCustomTask( } override def outputPort: Option[Port] = Some(FixedSchemaPort(SparqlUpdateEntitySchema.schema)) + + @Action( + label = "Show prefixes", + description = "Shows the available namespace prefixes as a SPARQL header that can be copied into the query." + ) + def showPrefixes(implicit pluginContext: PluginContext): String = { + val prefixes = pluginContext.prefixes + if (prefixes.prefixMap.isEmpty) { + "No prefixes are defined." + } else { + "```sparql\n" + prefixes.toSparql + "\n```" + } + } } object SparqlUpdateCustomTask { final val defaultBatchSize = 1 final val pluginId = "sparqlUpdateOperator" -} \ No newline at end of file +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala new file mode 100644 index 0000000000..bea941c521 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplate.scala @@ -0,0 +1,140 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.silkframework.entity.paths.{TypedPath, UntypedPath} +import org.silkframework.entity.{Entity, EntitySchema, ValueType} +import org.silkframework.execution.local.EmptyEntityTable +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateEngines, TemplateVariableConversions, TemplateVariableName, TemplateVariableValue, TemplateVariablesReader} +import org.silkframework.runtime.validation.ValidationException +import org.silkframework.util.Uri + +import java.io.StringWriter + +/** + * SPARQL template implementation for the Jinja engine. + * + * Exposes the following variables: + * + * {{ input.config. }} -- parameter of the connected input task + * {{ input.entity. }} -- value of the current input entity + * {{ output.config. }} -- parameter of the connected output task + * {{ project. }} -- project-scoped template variable + * {{ global. }} -- global template variable + * + * Entity property names must be valid Jinja identifiers (`[a-zA-Z_][a-zA-Z0-9_]*`) + * + * @param rawTemplate The raw template source. Compiled internally via the Jinja engine and also used + * to derive the output schema heuristically without rendering. + * @param defaultScope If non-empty, every variable at this scope is also exposed at the top level of the + * Jinja context, so the template may reference it without the scope prefix. For example, + * with `defaultScope = Seq("input", "entity")`, a template may use `{{ property }}` in + * place of `{{ input.entity.property }}`. + */ +class SparqlJinjaTemplate(rawTemplate: String, defaultScope: Seq[String] = Seq.empty) extends SparqlTemplate { + + import SparqlJinjaTemplate._ + + private val template: CompiledTemplate = TemplateEngines.create(JINJA_ENGINE_ID).compile(rawTemplate) + + override def generate(entity: Option[Entity], + taskProperties: TaskProperties, + templateVariables: Seq[TemplateVariableValue] = Seq.empty): Iterable[String] = { + val values = buildValues(entity, taskProperties, templateVariables) + val writer = new StringWriter() + template.evaluate(values, writer) + Seq(writer.toString) + } + + override def validate(variables: TemplateVariablesReader, batchSize: Option[Int]): Unit = { + val available = variables.all.variables.map(v => (v.name, v.scope)).toSet + for (variable <- referencedVariables.distinct) { + val effectiveScope = if (variable.scope.isEmpty) defaultScope else variable.scope + validateReference(variable, effectiveScope, available) + } + } + + private def validateReference(variable: TemplateVariableName, + effectiveScope: Seq[String], + available: Set[(String, Seq[String])]): Unit = { + effectiveScope.headOption match { + case Some(top) if VARIABLE_SCOPES.contains(top) => + if (!available.contains((variable.name, effectiveScope))) { + throw new ValidationException(s"Unknown template variable '${variable.scopedName}'.") + } + case Some(top) if TASK_SCOPES.contains(top) => + val subSection = effectiveScope.lift(1).getOrElse("") + if (!TASK_SUB_SECTIONS.contains(subSection)) { + throw new ValidationException( + s"Invalid template variable '${variable.scopedName}'. " + + s"Only '$top.config.' and '$top.entity.' are valid.") + } + case _ => + } + } + + override def inputSchema: EntitySchema = { + val properties = entityPropertyNames + if (properties.isEmpty) { + EmptyEntityTable.schema + } else { + EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) + } + } + + override lazy val outputSchema: EntitySchema = { + val vars = SparqlSelectVarExtractor.extractSelectVars(rawTemplate) + val paths = vars.map(v => TypedPath(UntypedPath(v), ValueType.STRING, isAttribute = false)) + EntitySchema(typeUri = Uri(""), typedPaths = paths.toIndexedSeq) + } + + override def isStaticTemplate: Boolean = { + entityPropertyNames.isEmpty + } + + private def buildValues(entity: Option[Entity], + taskProperties: TaskProperties, + templateVariables: Seq[TemplateVariableValue]): Seq[TemplateVariableValue] = { + val inputConfig = taskProperties.inputTask.map { case (k, v) => + new TemplateVariableValue(k, INPUT_CONFIG_SCOPE, Seq(v)) + } + val inputEntity = entity.toSeq.flatMap(e => TemplateVariableConversions.fromEntity(e, INPUT_ENTITY_SCOPE)) + val outputConfig = taskProperties.outputTask.map { case (k, v) => + new TemplateVariableValue(k, OUTPUT_CONFIG_SCOPE, Seq(v)) + } + val scoped = (inputConfig ++ inputEntity ++ outputConfig).toSeq ++ templateVariables + val aliased = + if (defaultScope.nonEmpty) { + scoped.filter(_.scope == defaultScope).map(v => new TemplateVariableValue(v.name, Seq.empty, v.values)) + } else { + Seq.empty + } + scoped ++ aliased + } + + private def referencedVariables: Seq[TemplateVariableName] = { + template.variables.getOrElse(Seq.empty) + } + + private def entityPropertyNames: Seq[String] = { + val scoped = referencedVariables.filter(_.scope == INPUT_ENTITY_SCOPE).map(_.name) + val aliased = + if (defaultScope == INPUT_ENTITY_SCOPE) { + referencedVariables.filter(_.scope.isEmpty).map(_.name) + } else { + Seq.empty + } + (scoped ++ aliased).distinct + } +} + +object SparqlJinjaTemplate { + + private[templating] final val JINJA_ENGINE_ID = "jinja" + + private[templating] final val INPUT_CONFIG_SCOPE: Seq[String] = Seq("input", "config") + private[templating] final val INPUT_ENTITY_SCOPE: Seq[String] = Seq("input", "entity") + private[templating] final val OUTPUT_CONFIG_SCOPE: Seq[String] = Seq("output", "config") + + private final val VARIABLE_SCOPES: Set[String] = Set("project", "global") + private final val TASK_SCOPES: Set[String] = Set("input", "output") + private final val TASK_SUB_SECTIONS: Set[String] = Set("config", "entity") +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala new file mode 100644 index 0000000000..0adaaf906a --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlLegacyTemplate.scala @@ -0,0 +1,240 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.apache.jena.query.QueryFactory +import org.silkframework.entity.paths.{TypedPath, UntypedPath} +import org.silkframework.entity.{Entity, EntitySchema, ValueType} +import org.silkframework.execution.local.EmptyEntityTable +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlLegacyTemplate._ +import org.silkframework.runtime.templating.{CompiledTemplate, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue, TemplateVariablesReader} +import org.silkframework.runtime.validation.ValidationException +import org.silkframework.util.Uri + +import java.io.StringWriter +import scala.jdk.CollectionConverters.ListHasAsScala +import scala.util.{Failure, Success, Try} + +/** + * SPARQL template implementation for the Velocity and Simple template engines. + * + * Exposes input entity values via a `row` object, and the connected input/output task parameters via + * `inputProperties` / `outputProperties` objects. All three objects offer the methods defined on + * [[TemplateValueAccessApi]] (`uri`, `plainLiteral`, `rawUnsafe`, `exists`). + */ +class SparqlLegacyTemplate(template: CompiledTemplate) extends SparqlTemplate { + + override def generate(entity: Option[Entity], + taskProperties: TaskProperties, + templateVariables: Seq[TemplateVariableValue] = Seq.empty): Iterable[String] = { + entity match { + case Some(e) if e.values.nonEmpty => + val properties = e.schema.typedPaths.map(_.normalizedSerialization) + CrossProductIterator(e.values, properties).map(renderOnce(_, taskProperties)).toSeq + case _ => + Seq(renderOnce(Map.empty, taskProperties)) + } + } + + private def renderOnce(placeholderAssignments: Map[String, String], + taskProperties: TaskProperties): String = { + val values = scala.collection.mutable.LinkedHashMap[String, AnyRef]() + // Flat entity values (used by simple template engine) + placeholderAssignments.foreach { case (k, v) => values(k) = v } + // SPARQL context objects + values(ROW_VAR_NAME) = Row(placeholderAssignments) + values(INPUT_PROPERTIES_VAR_NAME) = InputProperties(taskProperties.inputTask) + values(OUTPUT_PROPERTIES_VAR_NAME) = OutputProperties(taskProperties.outputTask) + val writer = new StringWriter() + template.evaluate(values.toMap, writer) + writer.toString + } + + /** Renders the template with example values for every variable. Used to derive schemas and validate queries. */ + private def generateWithDefaults(): String = { + val genericUri = "urn:generic:1" + val entityVariables = entityVariableNames + val assignments = entityVariables.map(_ -> genericUri).toMap + val inputPropVars = taskPropertyVariableNames(Seq(INPUT_PROPERTIES_VAR_NAME)).map(_ -> genericUri).toMap + val outputPropVars = taskPropertyVariableNames(Seq(OUTPUT_PROPERTIES_VAR_NAME)).map(_ -> genericUri).toMap + val taskProps = TaskProperties(inputPropVars, outputPropVars) + Try(renderOnce(assignments, taskProps)) match { + case Failure(exception) => + throw new ValidationException( + "The SPARQL Update template could not be rendered with example values. Error message: " + exception.getMessage, exception) + case Success(value) => value + } + } + + override def validate(variables: TemplateVariablesReader, batchSize: Option[Int]): Unit = { + if (!usesRawUnsafe) { + // Skipped for rawUnsafe templates: they can generate arbitrary SPARQL syntax so example-query validation is unreliable. + SparqlTemplate.validateParseability(generateWithDefaults(), batchSize.getOrElse(1)) + } + } + + override def inputSchema: EntitySchema = { + val properties = entityVariableNames + if (properties.isEmpty) { + EmptyEntityTable.schema + } else { + EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) + } + } + + override lazy val outputSchema: EntitySchema = { + val query = QueryFactory.create(generateWithDefaults()) + if (!query.isSelectType) { + throw new ValidationException("Query is not a SELECT query!") + } + val typedPaths = query.getResultVars.asScala.map { v => + TypedPath(UntypedPath(v), ValueType.STRING, isAttribute = false) + } + EntitySchema(typeUri = Uri(""), typedPaths = typedPaths.toIndexedSeq) + } + + override def isStaticTemplate: Boolean = { + sparqlVariables match { + case Some(vars) => vars.isEmpty + case None => false + } + } + + /** SPARQL-specific method names that accept a string parameter representing an input path. */ + private val sparqlMethodNames = Set("uri", "plainLiteral", "rawUnsafe", "exists") + + /** Returns SPARQL-specific variables, extracting paths from method usages. */ + private lazy val sparqlVariables: Option[Seq[TemplateVariableName]] = { + val usages = templatingVariables.flatMap(v => template.methodUsages(v)) + if (usages.nonEmpty) { + val rowVars = sparqlMethodUsages(ROW_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue)) + val inputPropVars = sparqlMethodUsages(INPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, Seq(INPUT_PROPERTIES_VAR_NAME))) + val outputPropVars = sparqlMethodUsages(OUTPUT_PROPERTIES_VAR_NAME) + .map(u => new TemplateVariableName(u.parameterValue, Seq(OUTPUT_PROPERTIES_VAR_NAME))) + Some((rowVars ++ inputPropVars ++ outputPropVars).distinct) + } else { + template.variables + } + } + + /** Returns method usages on the given variable filtered to SPARQL-specific methods. */ + private def sparqlMethodUsages(variableName: String): Seq[TemplateMethodUsage] = { + template.methodUsages(variableName).filter(u => sparqlMethodNames.contains(u.methodName)) + } + + /** Checks if any SPARQL templating variable uses the rawUnsafe method. */ + private lazy val usesRawUnsafe: Boolean = { + templatingVariables.exists(varName => + sparqlMethodUsages(varName).exists(_.methodName == "rawUnsafe")) + } + + /** Returns entity variable names (those with empty scope). */ + private def entityVariableNames: Seq[String] = { + sparqlVariables match { + case Some(vars) => + vars.filter(_.scope.isEmpty).map(_.name).distinct + case None => + Seq.empty + } + } + + /** Returns variable names for a specific scope (e.g. "inputProperties", "outputProperties"). */ + private def taskPropertyVariableNames(scope: Seq[String]): Seq[String] = { + sparqlVariables match { + case Some(vars) => + vars.filter(_.scope == scope).map(_.name).distinct + case None => + Seq.empty + } + } +} + +object SparqlLegacyTemplate { + + private[templating] final val ROW_VAR_NAME = "row" + private[templating] final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" + private[templating] final val OUTPUT_PROPERTIES_VAR_NAME = "outputProperties" + + private final val templatingVariables = Seq(ROW_VAR_NAME, INPUT_PROPERTIES_VAR_NAME, OUTPUT_PROPERTIES_VAR_NAME) + + /** Row API. Represents a single row where input paths are either exactly one value or empty. + * + * Available in templates as the `row` variable. + * + * Examples (Velocity): + * + *
+    *   $row.uri("urn:prop:uriProp") ## Renders the value as a URI, e.g. 
+    *   $row.plainLiteral("urn:prop:stringProp") ## Renders the value as a plain literal, e.g. "Quotes \" are escaped"
+    *   $row.rawUnsafe("urn:prop:trustedValuesOnly") ## Puts the value as-is; UNSAFE, prone to injection.
+    *   #if ( $row.exists("urn:prop:valueMightNotExist") )
+    *     $row.plainLiteral("urn:prop:valueMightNotExist")
+    *   #end
+    * 
+ * + * @param inputValues Map of available input values. Paths without a value are absent. + */ + case class Row(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = ROW_VAR_NAME + } + + /** Similar to Row, but for the input task properties. */ + case class InputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = INPUT_PROPERTIES_VAR_NAME + } + + /** Similar to Row, but for the output task properties. */ + case class OutputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { + override val templateVarName: String = OUTPUT_PROPERTIES_VAR_NAME + } + + /** + * Iterates over the cross-product of per-property value lists, producing one `Map[String, String]` + * per combination. Used by the legacy template engine, which renders one query per combination. + * + * Preserves the existing behavior of emitting at least one (empty) assignment, even if all value + * lists are empty. + */ + private[templating] case class CrossProductIterator(values: IndexedSeq[Seq[String]], + properties: IndexedSeq[String]) extends Iterator[Map[String, String]] { + assert(values.nonEmpty) + private val sizes = values.map(_.size).toArray + // Holds the current index combination + private val indexes = new Array[Int](values.size) + private val firstNonEmptyIdx = sizes.zipWithIndex.filter(_._1 > 0).map(_._2).headOption.getOrElse(-1) // -1 if all are empty + private val lastIndex = values.size - 1 + private var first: Boolean = true // This makes sure that at least one assignment is always generated + + override def hasNext: Boolean = first || firstNonEmptyIdx > -1 && (indexes(firstNonEmptyIdx) < sizes(firstNonEmptyIdx)) + + override def next(): Map[String, String] = { + if (!hasNext) { + throw new IllegalStateException("Iterator is fully consumed and has no more values!") + } + val nextAssignment = indexes.zipWithIndex.collect { + case (valueIdx, propertyIndex) if sizes(propertyIndex) > 0 => properties(propertyIndex) -> values(propertyIndex)(valueIdx) + }.toMap + setNextIndexCombinations() + first = false + nextAssignment + } + + private def setNextIndexCombinations(): Unit = { + var idx = lastIndex + while (idx > -1) { + indexes(idx) += 1 + if (indexes(idx) >= sizes(idx) && idx != firstNonEmptyIdx) { // Do not reset the first index, because of hasNext check + indexes(idx) = 0 + idx -= 1 + } else if (idx > 0) { + for (i <- (idx + 1) to lastIndex) { // null all index values after this index + indexes(i) = 0 + } + idx = -1 + } else { + idx = -1 + } + } + } + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractor.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractor.scala new file mode 100644 index 0000000000..3851c86073 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractor.scala @@ -0,0 +1,87 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +/** + * Best-effort heuristic that extracts the projected result variables from a SPARQL SELECT query. + * + * Unlike a full SPARQL parser, this works on raw template text that may still contain unevaluated + * placeholders (Jinja, Velocity, ...). It is used where rendering-with-defaults would fail because + * placeholders sit in contexts that do not accept a seed URI (string literals, numeric limits, etc.). + * + * Strategy: + * + * 1. Locate the first `SELECT` keyword (word-bounded, case-insensitive). + * 2. Find the end of the projection clause: the first `WHERE` / `FROM` keyword or `{`. + * 3. Strip a leading `DISTINCT` / `REDUCED`. + * 4. If the projection is `*`, fall back to collecting every distinct `?var` token in the full query. + * 5. Otherwise, walk the projection tracking parenthesis depth. At depth 0 collect `?var` directly. + * For each `( ... )` group, extract the alias of the last `AS ?alias` inside it. + * + * Returns an empty sequence when no match can be found (non-SELECT templates, or templates whose + * projection itself is produced by a placeholder). + */ +object SparqlSelectVarExtractor { + + private val selectKeywordPattern = """(?i)\bSELECT\b""".r + private val whereKeywordPattern = """(?i)\bWHERE\b""".r + private val fromKeywordPattern = """(?i)\bFROM\b""".r + private val distinctReducedPattern = """(?i)^(?:DISTINCT|REDUCED)\s+""".r + private val anyVarPattern = """\?([A-Za-z_][A-Za-z0-9_]*)""".r + private val asAliasPattern = """(?i)\bAS\s+\?([A-Za-z_][A-Za-z0-9_]*)""".r + + def extractSelectVars(query: String): Seq[String] = { + selectKeywordPattern.findFirstMatchIn(query) match { + case None => Seq.empty + case Some(m) => + val afterSelect = query.substring(m.end) + val boundary = projectionBoundary(afterSelect) + val projection = distinctReducedPattern.replaceFirstIn(afterSelect.substring(0, boundary).trim, "") + if (projection.trim == "*") { + anyVarPattern.findAllMatchIn(query).map(_.group(1)).toSeq.distinct + } else { + extractProjectedVars(projection) + } + } + } + + private def projectionBoundary(afterSelect: String): Int = { + val candidates = Seq( + whereKeywordPattern.findFirstMatchIn(afterSelect).map(_.start), + fromKeywordPattern.findFirstMatchIn(afterSelect).map(_.start), + Some(afterSelect.indexOf('{')).filter(_ >= 0) + ).flatten + if (candidates.isEmpty) afterSelect.length else candidates.min + } + + private def extractProjectedVars(projection: String): Seq[String] = { + val vars = scala.collection.mutable.ArrayBuffer.empty[String] + var depth = 0 + var parenStart = 0 + var i = 0 + while (i < projection.length) { + projection.charAt(i) match { + case '(' => + if (depth == 0) parenStart = i + depth += 1 + i += 1 + case ')' => + depth -= 1 + if (depth == 0) { + val content = projection.substring(parenStart + 1, i) + asAliasPattern.findAllMatchIn(content).toSeq.lastOption.foreach(m => vars += m.group(1)) + } + i += 1 + case '?' if depth == 0 => + val start = i + 1 + var j = start + while (j < projection.length && isVarChar(projection.charAt(j))) j += 1 + if (j > start) vars += projection.substring(start, j) + i = j + case _ => + i += 1 + } + } + vars.toSeq.distinct + } + + private def isVarChar(c: Char): Boolean = c.isLetterOrDigit || c == '_' +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala new file mode 100644 index 0000000000..cc7ace988e --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplate.scala @@ -0,0 +1,109 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.apache.jena.update.UpdateFactory +import org.silkframework.config.{Prefixes, Task, TaskSpec} +import org.silkframework.entity.{Entity, EntitySchema} +import org.silkframework.runtime.plugin.PluginContext +import org.silkframework.runtime.templating.{TemplateEngines, TemplateVariableValue, TemplateVariablesReader} +import org.silkframework.runtime.validation.ValidationException + +import scala.util.Try + +/** + * Compiled SPARQL template. Encapsulates rendering a SPARQL query from a template and the associated + * context (connected input/output task properties, current entity values, project/global variables). + * + * Two concrete implementations exist: + * + * - [[SparqlJinjaTemplate]] for the Jinja engine, which exposes variables as `input.config.*`, + * `input.entity.*`, `output.config.*`, `project.*` and `global.*`. + * - [[SparqlLegacyTemplate]] for the Velocity and Simple engines, which exposes the historical + * `row` / `inputProperties` / `outputProperties` object API. + */ +trait SparqlTemplate { + + /** + * Renders the template. + * + * @param entity The current input entity, or `None` for static templates. + * @param taskProperties Parameter values of the connected input and output tasks. + * @param templateVariables Project and global template variables + * @return One rendered query for Jinja, or one query per cross-product combination for the legacy engine. + */ + def generate(entity: Option[Entity], + taskProperties: TaskProperties, + templateVariables: Seq[TemplateVariableValue] = Seq.empty): Iterable[String] + + /** Validates the template and, if batchSize > 1, that batching produces valid SPARQL. */ + def validate(variables: TemplateVariablesReader, batchSize: Option[Int]): Unit + + /** Entity schema that the template expects on its input port. */ + def inputSchema: EntitySchema + + /** Output schema projected by a SELECT query. Unused for UPDATE templates. */ + def outputSchema: EntitySchema + + /** True if the template does not reference any entity values and thus needs no input port. */ + def isStaticTemplate: Boolean +} + +object SparqlTemplate { + + /** + * Creates a SPARQL template using the given template engine. + * + * @param defaultScope Scope whose variables are additionally exposed at the top level of the Jinja context, + * so templates may reference them without the scope prefix. Only honored by the Jinja + * implementation. Pass `Seq.empty` to disable aliasing. + */ + def create(templateEngineId: String, template: String, defaultScope: Seq[String] = Seq.empty): SparqlTemplate = { + if (templateEngineId == SparqlJinjaTemplate.JINJA_ENGINE_ID) { + new SparqlJinjaTemplate(template, defaultScope) + } else { + val compiled = TemplateEngines.create(templateEngineId).compile(template) + new SparqlLegacyTemplate(compiled) + } + } + + /** + * Verifies that a rendered example query parses as SPARQL Update, and — when batchSize > 1 — that two + * consecutive copies also parse (so batching in [[org.silkframework.plugins.dataset.rdf.executors.BatchSparqlUpdateEmitter]] + * produces valid queries). + */ + private[templating] def validateParseability(query: String, batchSize: Int): Unit = { + Try(UpdateFactory.create(query)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + + parseError.getMessage + ", example query: " + query) + } + if (batchSize > 1) { + val batchSparql = query + "\n" + query + Try(UpdateFactory.create(batchSparql)).failed.toOption.foreach { parseError => + throw new ValidationException( + "The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + + parseError.getMessage + ", example batch query: " + batchSparql) + } + } + } +} + +/** Makes properties of the input and output task of a SPARQL operator execution available to the template. */ +case class TaskProperties(inputTask: Map[String, String], outputTask: Map[String, String]) + +object TaskProperties { + + def create(inputTask: Option[Task[_ <: TaskSpec]], + outputTask: Option[Task[_ <: TaskSpec]], + pluginContext: PluginContext): TaskProperties = { + // It's obligatory to have empty prefixes here, since we do not want to have prefixed URIs for URI parameters + implicit val updatedPluginContext: PluginContext = PluginContext.updatedPluginContext(pluginContext, prefixes = Some(Prefixes.empty)) + val inputProperties = createTaskProperties(inputTask) + val outputProperties = createTaskProperties(outputTask) + TaskProperties(inputProperties, outputProperties) + } + + private def createTaskProperties(task: Option[Task[_ <: TaskSpec]]) + (implicit pluginContext: PluginContext): Map[String, String] = { + task.toSeq.flatMap(_.parameters.toStringMap).toMap + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala deleted file mode 100644 index cc6a797589..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocity.scala +++ /dev/null @@ -1,137 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating -import org.apache.jena.update.UpdateFactory -import org.apache.velocity.runtime.parser.node._ -import org.silkframework.entity.EntitySchema -import org.silkframework.entity.paths.UntypedPath -import org.silkframework.execution.local.EmptyEntityTable -import org.silkframework.runtime.validation.ValidationException - -import scala.util.{Failure, Success, Try} - -/** - * A SPARQL Update templating engine based on Velocity. - */ -case class SparqlTemplatingEngineVelocity(sparqlUpdateTemplate: String, batchSize: Int) extends SparqlUpdateTemplatingEngine { - private val sparqlTemplate = SparqlVelocityTemplating.createTemplate(sparqlUpdateTemplate) - - override def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { - SparqlVelocityTemplating.renderTemplate(sparqlTemplate, Row(placeholderAssignments), taskProperties) - } - - override def validate(): Unit = { - // We cannot generate meaningful example values for the template if $row.rawUnsafe() is used, because it could generate arbitrary SPARQL syntax. - if(!usesRawUnsafe()) { - // Generate example input assignments - val genericUri = "urn:generic:1" // Valid URI string is valid in URI and literal position, so use always the same URI - val assignments = inputPaths().map(p => (p, genericUri)).toMap - val inputPropertiesAssignments = variableMethodUsages(SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME).map(_.parameterValue -> genericUri).toMap - val outputPropertiesAssignments = variableMethodUsages(SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME).map(_.parameterValue -> genericUri).toMap - // Generate SPARQL Update query with example assignments - val sparqlQuery = Try(generate(assignments, TaskProperties(inputPropertiesAssignments, outputPropertiesAssignments))) match { - case Failure(exception) => - throw new ValidationException("The SPARQL Update template could not be rendered with example value. Error message: " + exception.getMessage, exception) - case Success(value) => value - } - // Validate generated SPARQL Update query - Try(UpdateFactory.create(sparqlQuery)).failed.toOption foreach { parseError => - throw new ValidationException("The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparqlQuery) - } - // If queries should be batched, also check if queries can be batched, i.e. concatenated and still have valid syntax - if (batchSize > 1) { - val batchSparql = sparqlQuery + "\n" + sparqlQuery - Try(UpdateFactory.create(batchSparql)).failed.toOption foreach { parseError => - throw new ValidationException("The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) - } - } - } - } - - override def inputSchema: EntitySchema = { - val properties = inputPaths() - if (properties.isEmpty) { - EmptyEntityTable.schema // Static template, no input data needed - } else { - EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) - } - } - - def inputPaths(): Seq[String] = { - variableMethodUsages(SparqlVelocityTemplating.ROW_VAR_NAME).map(_.parameterValue).distinct - } - - // Extracts all method invocations on the given variable name in the config - def variableMethodUsages(variableName: String): Seq[TemplateVariableMethodUsage] = { - sparqlTemplate.getData match { - case simpleNode: SimpleNode => - // This should always be the case - retrieveRowMethodUsages(simpleNode, variableName) - case None => - throw new RuntimeException(s"Unexpected error: Cannot retrieve $variableName object method usages from Velocity template.") - } - } - - private def usesRawUnsafe(): Boolean = { - SparqlVelocityTemplating.templatingVariables.exists { variableName => - variableMethodUsages(variableName).exists(_.rowMethod == rawUnsafeMethodName) - } - } - - private val rawUnsafeMethodName = "rawUnsafe" - - final val rowMethodsWithPathParameter = Set("uri", "plainLiteral", rawUnsafeMethodName, "exists") - /** Retrieves the input paths that are used via the [[Row]] API. */ - private def retrieveRowMethodUsages(simpleNode: Node, varName: String): List[TemplateVariableMethodUsage] = { - simpleNode match { - case astMethod: ASTMethod => - astReferenceName(astMethod.jjtGetParent()) match { - case Some(v) if v == varName && - rowMethodsWithPathParameter.contains(astMethod.getMethodName) && - validStringRowMethodParameter(astMethod) => - // Collect parameter values from the specified methods of the 'row' object, since only these must all be input paths. - val parameterValue = astMethod.jjtGetChild(1).jjtGetChild(0).asInstanceOf[ASTStringLiteral].literal().stripPrefix("\"").stripSuffix("\"") - List(TemplateVariableMethodUsage(astMethod.getMethodName, parameterValue)) - case _ => - List.empty - } - case other: SimpleNode => - retrieveChildRowMethodUsages(other, varName) - } - } - - // Make sure that there is a single string constant as parameter - private def validStringRowMethodParameter(astMethod: ASTMethod): Boolean = { - astMethod.jjtGetNumChildren() == 2 && { - val parameter = astMethod.jjtGetChild(1) - parameter.isInstanceOf[ASTExpression] && - parameter.jjtGetNumChildren() == 1 && - parameter.jjtGetChild(0).isInstanceOf[ASTStringLiteral] && - parameter.jjtGetChild(0).asInstanceOf[ASTStringLiteral].isConstant - } - } - - case class TemplateVariableMethodUsage(rowMethod: String, parameterValue: String) - - private def astReferenceName(node: Node): Option[String] = { - node match { - case reference: ASTReference => - Some(reference.getRootString) - case _ => - None - } - } - - private def retrieveChildRowMethodUsages(other: SimpleNode, varName: String): List[TemplateVariableMethodUsage] = { - val childPaths = for (idx <- 0 until other.jjtGetNumChildren()) yield { - retrieveRowMethodUsages(other.jjtGetChild(idx), varName) - } - childPaths.fold(List.empty[TemplateVariableMethodUsage])((a, b) => a ::: b) - } - - override def isStaticTemplate: Boolean = { - SparqlVelocityTemplating.templatingVariables.forall { variableName => - variableMethodUsages(variableName).isEmpty - } - } -} diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngine.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngine.scala deleted file mode 100644 index 55b8b20d3f..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngine.scala +++ /dev/null @@ -1,28 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating - -import org.silkframework.entity.EntitySchema - -/** - * Trait that every SPARQL Update templating engine must implement. - */ -trait SparqlUpdateTemplatingEngine { - /** - * Renders the template based on the variable assignments. - */ - def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String - - /** Validates the template */ - def validate(): Unit - - /** The input entity schema that is expected by the template. */ - def inputSchema: EntitySchema - - /** The SPARQL Update template that will be rendered on every generate call */ - def sparqlUpdateTemplate: String - - /** True is the given template is static, i.e. contains no placeholder variables */ - def isStaticTemplate: Boolean -} - -/** Makes properties of the input and output task of a SPARQL Update operator execution available. */ -case class TaskProperties(inputTask: Map[String, String], outputTask: Map[String, String]) \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala index f5f3e373b9..a6e1668ccc 100644 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingEngineSimple.scala @@ -1,57 +1,58 @@ package org.silkframework.plugins.dataset.rdf.tasks.templating import org.apache.jena.graph.NodeFactory -import org.apache.jena.update.UpdateFactory -import org.silkframework.entity.EntitySchema -import org.silkframework.entity.paths.UntypedPath -import org.silkframework.execution.local.EmptyEntityTable import org.silkframework.rule.util.JenaSerializationUtil +import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateVariableName, TemplateVariableValue} import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.Uri +import java.io.Writer import scala.collection.mutable.ArrayBuffer -import scala.util.Try import scala.util.matching.Regex +/** + * A simple templating engine that supports plain literal and URI placeholders. + */ +@Plugin( + id = SparqlSimpleTemplateEngine.id, + label = "Simple (deprecated)", + description = "A simple templating engine that supports plain literal and URI placeholders.", + deprecation = "This template engine is deprecated. Please use the 'Jinja' template engine instead." +) +case class SparqlSimpleTemplateEngine() extends TemplateEngine { + + override def compile(templateString: String): SparqlSimpleCompiledTemplate = { + new SparqlSimpleCompiledTemplate(templateString.replace("\r\n", "\n")) + } +} + +object SparqlSimpleTemplateEngine { + final val id = "simple" +} /** - * A simple templating engine that can only render plain literals and URIs. + * A compiled simple SPARQL Update template that can only render plain literals and URIs. * Example: * * DELETE DATA { ${} rdf:label ${"PROP_FROM_ENTITY_SCHEMA2"} } */ -case class SparqlUpdateTemplatingEngineSimple(sparqlUpdateTemplate: String, batchSize: Int) extends SparqlUpdateTemplatingEngine { - /** Validate the generated SPARQL of the template and check for batch execution characteristics */ - override def validate(): Unit = { - val sparql = (sparqlUpdateTemplateParts map { - case SparqlUpdateTemplatePlainLiteralPlaceholder(prop) => - validateUri(prop) - "\"placeholder value\"" - case SparqlUpdateTemplateURIPlaceholder(prop) => - validateUri(prop) - "" - case SparqlUpdateTemplateStaticPart(partialSparql) => - partialSparql - }).mkString - Try(UpdateFactory.create(sparql)).failed.toOption foreach { parseError => - throw new ValidationException("The SPARQL Update template does not generate valid SPARQL Update queries. Error message: " + - parseError.getMessage + ", example query: " + sparql) - } - if(batchSize > 1) { - val batchSparql = sparql + "\n" + sparql - Try(UpdateFactory.create(batchSparql)).failed.toOption foreach { parseError => - throw new ValidationException("The SPARQL Update template cannot be batched processed. There is probably a ';' missing at the end. Error message: " + - parseError.getMessage + ", example batch query: " + batchSparql) - } - } +class SparqlSimpleCompiledTemplate(val sparqlUpdateTemplate: String) extends CompiledTemplate { + + override lazy val variables: Option[Seq[TemplateVariableName]] = Some( + properties.map(p => new TemplateVariableName(p, Seq.empty)) + ) + + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + val stringValues = values.map { case (k, v) => k -> String.valueOf(v) } + writer.write(render(stringValues)) + } + + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { + evaluate(convertValues(values), writer) } - /** - * Generates The SPARQL Update query based on the placeholder assignments. - * @param placeholderAssignments For each placeholder in the query template - * @return - */ - override def generate(placeholderAssignments: Map[String, String], taskProperties: TaskProperties): String = { + private def render(placeholderAssignments: Map[String, String]): String = { def assignmentValue(prop: String): String = placeholderAssignments.get(prop) match { case Some(value) => value case None => throw new ValidationException(s"No value assignment for placeholder property $prop") @@ -135,22 +136,10 @@ case class SparqlUpdateTemplatingEngineSimple(sparqlUpdateTemplate: String, batc templateParts.toSeq } - override def inputSchema: EntitySchema = { - if (isStaticTemplate) { - EmptyEntityTable.schema // Static template, no input data needed - } else { - EntitySchema("", properties.map(p => UntypedPath(p).asUntypedValueType).toIndexedSeq) - } - } - private val properties: Seq[String] = sparqlUpdateTemplateParts. filter(_.isInstanceOf[SparqlUpdateTemplatePlaceholder]). map(_.asInstanceOf[SparqlUpdateTemplatePlaceholder].prop). distinct - - override def isStaticTemplate: Boolean = { - properties.isEmpty - } } sealed trait SparqlUpdateTemplatePart @@ -170,4 +159,4 @@ case class SparqlUpdateTemplateURIPlaceholder(prop: String) extends SparqlUpdate case class SparqlUpdateTemplatePlainLiteralPlaceholder(prop: String) extends SparqlUpdateTemplatePlaceholder /** Static SPARQL update query part */ -case class SparqlUpdateTemplateStaticPart(queryPart: String) extends SparqlUpdateTemplatePart \ No newline at end of file +case class SparqlUpdateTemplateStaticPart(queryPart: String) extends SparqlUpdateTemplatePart diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingMode.java b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingMode.java deleted file mode 100644 index 48c9cbaf73..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlUpdateTemplatingMode.java +++ /dev/null @@ -1,27 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating; - -import org.silkframework.runtime.plugin.types.EnumerationParameterType; - -/** - * - */ -public enum SparqlUpdateTemplatingMode implements EnumerationParameterType { - simple("simple", "Simple"), - velocity("velocity", "Velocity Engine"); - - private String id; - private String displayName; - - SparqlUpdateTemplatingMode(String id, String displayName) { - this.id = id; - this.displayName = displayName; - } - - public String id() { - return id; - } - - public String displayName() { - return displayName; - } -} \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala deleted file mode 100644 index bb0e9a334a..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlVelocityTemplating.scala +++ /dev/null @@ -1,153 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating - -import java.io.{StringReader, StringWriter} -import java.net.URI - -import org.apache.jena.graph.NodeFactory -import org.apache.velocity.context.Context -import org.apache.velocity.exception.MethodInvocationException -import org.apache.velocity.runtime.RuntimeSingleton -import org.apache.velocity.{Template, VelocityContext} -import org.silkframework.rule.util.JenaSerializationUtil - -import scala.util.Try - -/** - * Templating engine für SPARQL queries. - * Based on the Apache Velocity engine. - * - * @see See [[https://velocity.apache.org/]] for more information. - */ -object SparqlVelocityTemplating { - final val ROW_VAR_NAME = "row" - final val INPUT_PROPERTIES_VAR_NAME = "inputProperties" - final val OUTPUT_PROPERTIES_VAR_NAME = "outputProperties" - - final val templatingVariables = Seq(ROW_VAR_NAME, INPUT_PROPERTIES_VAR_NAME, OUTPUT_PROPERTIES_VAR_NAME) - - /** Creates a Velocity template based on the given template string. */ - def createTemplate(sparqlTemplate: String): Template = { - val service = RuntimeSingleton.getRuntimeServices - service.addProperty("runtime.strict_mode.enable", true) // This should fail if it cannot replace variables with input values. - val reader = new StringReader(sparqlTemplate) - val template = new Template() - template.setRuntimeServices(service) - template.setData(service.parse(reader, template)) - template.initDocument() - template - } - - /** Renders the template with the given context */ - def renderTemplate(template: Template, context: Context): String = { - val writer = new StringWriter() - template.merge(context, writer) - writer.toString - } - - def renderTemplate(template: Template, row: Row, taskProperties: TaskProperties): String = { - try { - val context = new VelocityContext() - context.put(ROW_VAR_NAME, row) - context.put(INPUT_PROPERTIES_VAR_NAME, InputProperties(taskProperties.inputTask)) - context.put(OUTPUT_PROPERTIES_VAR_NAME, OutputProperties(taskProperties.outputTask)) - renderTemplate(template, context) - } catch { - case ex: MethodInvocationException => - val adaptedMessage = prettifyExceptionMessage(Option(ex.getMessage).getOrElse("")) - throw TemplateExecutionException("Template could not be rendered. Error detail: " + adaptedMessage, ex) - } - } - - private def prettifyExceptionMessage(errorMessage: String): String = { - var replacement = errorMessage. - replace("java.lang.String", "String"). - replace("", ""). - replace("threw exception org.silkframework.plugins.dataset.rdf.tasks.templating.TemplateExecutionException", "has failed with error message") - for((className, varName) <- Seq(("Row", ROW_VAR_NAME), ("InputProperties", INPUT_PROPERTIES_VAR_NAME), ("OutputProperties", OUTPUT_PROPERTIES_VAR_NAME))) { - replacement = replacement.replace(s"Object 'org.silkframework.plugins.dataset.rdf.sparql.$className'", varName). - replace(s"in class org.silkframework.plugins.dataset.rdf.tasks.templating.$className" , s"of $$$varName object") - } - replacement - } -} - -/** Row API used in SPARQL templates. Represents a single row where input paths are either exactly one value or empty. - * - * The Row object will be available in Velocity templates as 'row' variable. - * - * Examples: - * - *
-  *   $row.uri("urn:prop:uriProp") ## Renders the value of the input path as URI, e.g. 
-  *   $row.plainLiteral("urn:prop:stringProp") ## Renders the value of the input paths as plain string, e.g. "Quotes \" are escaped"
-  *   $row.rawUnsafe("urn:prop:trustedValuesOnly") ## Puts the value as it is into the rendered template. This is UNSAFE and prone to injection attacks.
-  *   #if ( $row.exists("urn:prop:valueMightNotExist") ) ## Checks if a value exists for the input path, i.e. values can always be optional.
-  *     $row.plainLiteral("urn:prop:valueMightNotExist") ## If no value exists for the input path then this would throw an exception
-  *   #end
-  * 
- * - * @param inputValues The map of existing input values, i.e. values that were defined by input paths, but where no value was available are not set. - */ -case class Row(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = SparqlVelocityTemplating.ROW_VAR_NAME -} - -/** Similar to Row, but for the input task properties. */ -case class InputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = SparqlVelocityTemplating.INPUT_PROPERTIES_VAR_NAME -} - -/** Similar to Row, but for the output task properties. */ -case class OutputProperties(inputValues: Map[String, String]) extends TemplateValueAccessApi { - override val templateVarName: String = SparqlVelocityTemplating.OUTPUT_PROPERTIES_VAR_NAME -} - -/** API used in templates to access all kinds of input values. Represents a key value object where input values are either exactly one value or empty/not defined. - * - * See [[Row]] for examples. - * - */ -trait TemplateValueAccessApi { - def inputValues: Map[String, String] - - def templateVarName: String - - /** Returns the value for a specific input path as URI, i.e. <...> */ - def uri(inputPath: String): String = { - val value = objectValue(inputPath) - if(Try(new URI(value)).isFailure) { - throw TemplateExecutionException(s"Value for input path '$inputPath' is not a valid URI: '$value'") - } - val uriNode = NodeFactory.createURI(value) - JenaSerializationUtil.serializeSingleNode(uriNode) - } - - /** Checks if a value for the provided input path exists */ - def exists(inputPath: String): Boolean = { - inputValues.contains(inputPath) - } - - private def objectValue(inputPath: String): String = { - inputValues.get(inputPath) match { - case Some(value) => - value - case None => - throw TemplateExecutionException(s"Input path '$inputPath' did not exist in $$$templateVarName.") - } - } - - /** Returns the value for a specific input path as SPARQL plain literal, i.e. "..." */ - def plainLiteral(inputPath: String): String = { - val value = objectValue(inputPath) - val uriNode = NodeFactory.createLiteral(value) - JenaSerializationUtil.serializeSingleNode(uriNode) - } - - /** Puts the value of the input path as raw string into the rendered template. - * This can be UNSAFE and should never be used when the input data comes from untrusted sources. */ - def rawUnsafe(inputPath: String): String = { - objectValue(inputPath) - } -} - -case class TemplateExecutionException(message: String, cause: Throwable = null) extends RuntimeException(message, cause) \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/TemplateValueAccessApi.scala b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/TemplateValueAccessApi.scala new file mode 100644 index 0000000000..7a4886f2ba --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/main/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/TemplateValueAccessApi.scala @@ -0,0 +1,59 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import java.net.URI + +import org.apache.jena.graph.NodeFactory +import org.silkframework.rule.util.JenaSerializationUtil +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException + +import scala.util.Try + + +/** + * API used in templates to access all kinds of input values. Represents a key value object where input values are either exactly one value or empty/not defined. + * + * See [[Row]] for examples. + * + */ +trait TemplateValueAccessApi { + def inputValues: Map[String, String] + + def templateVarName: String + + /** Returns the value for a specific input path as URI, i.e. <...> */ + def uri(inputPath: String): String = { + val value = objectValue(inputPath) + if(Try(new URI(value)).isFailure) { + throw new TemplateEvaluationException(s"Value for input path '$inputPath' is not a valid URI: '$value'") + } + val uriNode = NodeFactory.createURI(value) + JenaSerializationUtil.serializeSingleNode(uriNode) + } + + /** Checks if a value for the provided input path exists */ + def exists(inputPath: String): Boolean = { + inputValues.contains(inputPath) + } + + private def objectValue(inputPath: String): String = { + inputValues.get(inputPath) match { + case Some(value) => + value + case None => + throw new TemplateEvaluationException(s"Input path '$inputPath' did not exist in $$$templateVarName.") + } + } + + /** Returns the value for a specific input path as SPARQL plain literal, i.e. "..." */ + def plainLiteral(inputPath: String): String = { + val value = objectValue(inputPath) + val uriNode = NodeFactory.createLiteral(value) + JenaSerializationUtil.serializeSingleNode(uriNode) + } + + /** Puts the value of the input path as raw string into the rendered template. + * This can be UNSAFE and should never be used when the input data comes from untrusted sources. */ + def rawUnsafe(inputPath: String): String = { + objectValue(inputPath) + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/resources/org/silkframework/plugins/dataset/rdf/sparqlUpdateProject.zip b/silk-plugins/silk-plugins-rdf/src/test/resources/org/silkframework/plugins/dataset/rdf/sparqlUpdateProject.zip index b74b7570d3..a028a08b33 100644 Binary files a/silk-plugins/silk-plugins-rdf/src/test/resources/org/silkframework/plugins/dataset/rdf/sparqlUpdateProject.zip and b/silk-plugins/silk-plugins-rdf/src/test/resources/org/silkframework/plugins/dataset/rdf/sparqlUpdateProject.zip differ diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala index aa9477b361..5bddfe8108 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/LocalSparqlUpdateExecutorTest.scala @@ -1,22 +1,23 @@ package org.silkframework.plugins.dataset.rdf -import org.silkframework.config.{CustomTask, FixedNumberOfInputs, InputPorts, PlainTask, Port, Prefixes, Task} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers +import org.silkframework.config._ import org.silkframework.entity._ import org.silkframework.entity.paths.{TypedPath, UntypedPath} -import org.silkframework.execution.{ExecutionReport, ExecutorOutput} import org.silkframework.execution.local.{GenericEntityTable, LocalEntities, LocalExecution} +import org.silkframework.execution.typed.SparqlUpdateEntitySchema +import org.silkframework.execution.{ExecutionReport, ExecutorOutput} import org.silkframework.plugins.dataset.rdf.executors.LocalSparqlUpdateExecutor import org.silkframework.plugins.dataset.rdf.tasks.SparqlUpdateCustomTask -import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlUpdateTemplatingMode +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlSimpleTemplateEngine +import org.silkframework.plugins.templating.velocity.VelocityTemplateEngine import org.silkframework.runtime.activity.{ActivityContext, UserContext} import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, TestPluginContext} import org.silkframework.runtime.validation.ValidationException import org.silkframework.util.{Identifier, TestMocks} import org.silkframework.workspace.TestWorkspaceProviderTestTrait -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers -import org.silkframework.execution.typed.SparqlUpdateEntitySchema class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestWorkspaceProviderTestTrait { behavior of "Local SPARQL Update Executor" @@ -30,11 +31,8 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW private val batchSize = 5 private val sparqlUpdateTemplate = s"""INSERT DATA { $${} $${"v"} } ;""" private val schema = EntitySchema("", typedPaths = IndexedSeq(TypedPath("s", ValueType.URI), TypedPath("v", ValueType.STRING))) - private val notIncluded = "NOT_INCLUDED" private val inputEntities: Seq[Entity] = Seq( Entity("http://example.org/entity/1", IndexedSeq(Seq("http://s1"), Seq("s1a", "s1b")), schema), - Entity("http://example.org/entity/2", IndexedSeq(Seq(s"http://$notIncluded"), Seq()), schema), - Entity("http://example.org/entity/3", IndexedSeq(Seq(), Seq(notIncluded)), schema), Entity("http://example.org/entity/4", IndexedSeq(Seq("http://s2a", "http://s2b"), Seq("s2a", "s2b", "s2c")), schema) ) private def mockInputTable(properties: Seq[(String, String)] = Seq.empty, @@ -78,6 +76,17 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW samplesEntities.entities.head.values.head.head must startWith ("""INSERT DATA { "s1a" } ;""") } + it should "fail when an input entity is missing a value referenced by the template" in { + val incompleteEntities = Seq( + Entity("http://example.org/entity/incomplete", IndexedSeq(Seq("http://s1"), Seq()), schema) + ) + val inputTaskMock = PlainTask("mockTask", new DummyTaskSpec(Map.empty)) + val input = Seq(GenericEntityTable(incompleteEntities, schema, inputTaskMock)) + intercept[ValidationException] { + executeTask(sparqlUpdateTemplate, input).entities.toList + } + } + it should "throw validation exception if an invalid input schema is found" in { val invalidSchema = EntitySchema("", typedPaths = IndexedSeq("s", "wrong").map(UntypedPath(_).asUntypedValueType)) val input = Seq(mockInputTable(schema = invalidSchema)) @@ -96,7 +105,7 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW it should "output one UPDATE query per input task when the template contains input property placeholders" in { val templateWithInputPropertyPlaceholders = """INSERT DATA { $inputProperties.uri("graph") $inputProperties.plainLiteral("graph") };""" val result = executeTask(templateWithInputPropertyPlaceholders, Seq(mockInputTable(Seq("graph" -> "g1")), - mockInputTable(Seq("graph" -> "g2"))), SparqlUpdateTemplatingMode.velocity) + mockInputTable(Seq("graph" -> "g2"))), VelocityTemplateEngine.id) result.entities.map(_.values.flatten.head).toList mustBe List("INSERT DATA { \"g1\" };\n" + "INSERT DATA { \"g2\" };") } @@ -104,20 +113,20 @@ class LocalSparqlUpdateExecutorTest extends AnyFlatSpec with Matchers with TestW it should "output one UPDATE query overall even for multiple inputs when no placeholder is used at all" in { val staticTemplate = """INSERT DATA { "1" };""" val result = executeTask(staticTemplate, Seq(mockInputTable(Seq("graph" -> "g1")), - mockInputTable(Seq("graph" -> "g2"))), SparqlUpdateTemplatingMode.velocity) + mockInputTable(Seq("graph" -> "g2"))), VelocityTemplateEngine.id) result.entities.map(_.values.flatten.head).toList mustBe List(staticTemplate) } private def sparqlUpdateTask(template: String, - mode: SparqlUpdateTemplatingMode): Task[SparqlUpdateCustomTask] = { - project.updateTask("task", SparqlUpdateCustomTask(template, batchSize = batchSize, templatingMode = mode)) + language: String): Task[SparqlUpdateCustomTask] = { + project.updateTask("task", SparqlUpdateCustomTask(template, batchSize = batchSize, templatingMode = language)) } private def executeTask(template: String, input: Seq[GenericEntityTable], - mode: SparqlUpdateTemplatingMode = SparqlUpdateTemplatingMode.simple, + language: String = SparqlSimpleTemplateEngine.id, activityContext: ActivityContext[ExecutionReport] = context): LocalEntities = { - val result = executor.execute(sparqlUpdateTask(template, mode), input, ExecutorOutput.empty, LocalExecution(true), activityContext) + val result = executor.execute(sparqlUpdateTask(template, language), input, ExecutorOutput.empty, LocalExecution(true), activityContext) result mustBe defined result.get } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala index 609e07ae1a..804a65d179 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTaskIntegrationTest.scala @@ -23,7 +23,11 @@ class SparqlUpdateTaskIntegrationTest extends AnyFlatSpec with Matchers with Sin // Uses Velocity templating mode ("Velocity", "workflowVelocity", "outputVelocity.csv", identity), // Uses Velocity templating mode and accessed input and output task properties - ("Velocity with task properties", "workflowVelocityTaskProperties", "outputVelocity.csv", taskPropertyConcatenate) // + ("Velocity with task properties", "workflowVelocityTaskProperties", "outputVelocity.csv", taskPropertyConcatenate), + // Uses Jinja templating mode + ("Jinja", "workflowJinja", "outputJinja.csv", identity), + // Uses Jinja templating mode and accessed input and output task properties + ("Jinja with task properties", "workflowJinjaTaskProperties", "outputJinja.csv", taskPropertyConcatenate) )) { it should s"generate the correct result in '$templatingMode' templating mode" in { executeWorkflow(workflowId) @@ -49,7 +53,7 @@ class SparqlUpdateTaskIntegrationTest extends AnyFlatSpec with Matchers with Sin ) val sparqlSelectTaskReport = taskReports(4).report sparqlSelectTaskReport.entityCount mustBe 8 - sparqlSelectTaskReport.summary.filter(r => r._1 == "No. of rows processed").map(_._2) mustBe Seq("8") + sparqlSelectTaskReport.summary.filter(r => r._1.startsWith("No. of rows processed")).map(_._2) mustBe Seq("8") val sparqlUpdateTaskReport = taskReports(5).report // Batch size is set to 2, so half the number of the SPARQL Select task diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala index 8dcbd85e13..6dc6b2a85c 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/SparqlUpdateTemplatingEngineSimpleTest.scala @@ -1,12 +1,15 @@ package org.silkframework.plugins.dataset.rdf +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.entity.{Entity, EntitySchema} import org.silkframework.plugins.dataset.rdf.tasks._ import org.silkframework.plugins.dataset.rdf.tasks.templating._ import org.silkframework.runtime.validation.ValidationException import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers import org.silkframework.config.{FixedNumberOfInputs, FixedSchemaPort} +import org.silkframework.runtime.templating.{InMemoryTemplateVariablesReader, TemplateVariables} class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { behavior of "SPARQL Update Simple Templating Engine" @@ -56,7 +59,7 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { } it should "generate the correct input schema from the template" in { - SparqlUpdateCustomTask(sparqlUpdateTemplate).inputPorts match { + SparqlUpdateCustomTask(sparqlUpdateTemplate, templatingMode = SparqlSimpleTemplateEngine.id).inputPorts match { case FixedNumberOfInputs(Seq(FixedSchemaPort(schema))) => schema.typedPaths.flatMap(_.propertyUri).map(_.uri).toSet mustBe Set( "PROP_FROM_ENTITY_SCHEMA1", @@ -69,21 +72,29 @@ class SparqlUpdateTemplatingEngineSimpleTest extends AnyFlatSpec with Matchers { } it should "generate the correct SPARQL Update query from the template" in { - SparqlUpdateCustomTask(sparqlUpdateTemplate).generate(Map( + val bindings = Map( "PROP_FROM_ENTITY_SCHEMA1" -> "urn:some:uri", "PROP_FROM_ENTITY_SCHEMA2" -> "the old label", "PROP_FROM_ENTITY_SCHEMA3" -> """The new |label with some "'weird characters""".stripMargin - ), TaskProperties(Map.empty, Map.empty)) mustBe + ) + SparqlUpdateCustomTask(sparqlUpdateTemplate, templatingMode = SparqlSimpleTemplateEngine.id) + .compiledTemplate.generate(Some(entityFromMap(bindings)), TaskProperties(Map.empty, Map.empty)).head mustBe """PREFIX rdf: |DELETE DATA { rdf:label "the old label" } ; | INSERT DATA { rdf:label "The new\nlabel with some \"'weird characters" } ;""".stripMargin } + private def entityFromMap(values: Map[String, String]): Entity = { + val entries = values.toIndexedSeq + val schema = EntitySchema("", entries.map { case (k, _) => UntypedPath(k).asUntypedValueType }) + Entity("urn:test", entries.map { case (_, v) => Seq(v) }, schema) + } + def parse(sparqlUpdateTemplate: String, batchSize: Int = 2): Seq[SparqlUpdateTemplatePart] = { - val engine = SparqlUpdateTemplatingEngineSimple(sparqlUpdateTemplate, batchSize) - engine.validate() - engine.sparqlUpdateTemplateParts + val compiled = SparqlSimpleTemplateEngine().compile(sparqlUpdateTemplate) + new SparqlLegacyTemplate(compiled).validate(InMemoryTemplateVariablesReader(TemplateVariables.empty, Set.empty), Some(batchSize)) + compiled.sparqlUpdateTemplateParts } } diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala index 3a1e70e257..daf70b9be2 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/executors/LocalSparqlSelectExecutorTest.scala @@ -3,13 +3,18 @@ package org.silkframework.plugins.dataset.rdf.executors import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers -import org.silkframework.config.PlainTask +import org.silkframework.config.{PlainTask, Prefixes, Task} import org.silkframework.dataset.rdf._ -import org.silkframework.entity.Entity +import org.silkframework.dataset.{DataSource, DatasetSpec, EntitySink, LinkSink} +import org.silkframework.entity.paths.TypedPath +import org.silkframework.entity.{Entity, EntitySchema, ValueType} +import org.silkframework.execution.ReportingIterator +import org.silkframework.execution.local.GenericEntityTable import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask import org.silkframework.runtime.activity.{TestUserContextTrait, UserContext} import org.silkframework.runtime.iterator.{CloseableIterator, TraversableIterator} -import org.silkframework.runtime.plugin.PluginContext +import org.silkframework.runtime.plugin.{ParameterValues, PluginContext} +import org.silkframework.runtime.templating.exceptions.UnboundVariablesException import org.silkframework.util.{MockitoSugar, TestMocks} import scala.collection.immutable.SortedMap @@ -29,8 +34,8 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec val task = SparqlSelectCustomTask("SELECT * WHERE {?s ?p ?o}") val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) val sparqlEndpoint = new SparqlEndpoint { - override def sparqlParams: SparqlParams = ??? - override def withSparqlParams(sparqlParams: SparqlParams): SparqlEndpoint = ??? + override def sparqlParams: SparqlParams = SparqlParams() + override def withSparqlParams(sparqlParams: SparqlParams): SparqlEndpoint = this override def select(query: String, limit: Int)(implicit userContext: UserContext): SparqlResults = { val entities = for(i <- Iterator.range(0, limit)) yield { @@ -42,7 +47,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec } Entity.empty("") // Make sure that Entity class is loaded val start = System.currentTimeMillis() - val entities = new LocalSparqlSelectIterator(task, sparqlEndpoint, executionReportUpdater = Some(reportUpdater)) + val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint), None, executionReportUpdater = Some(reportUpdater)) val entity = entities.head entity.values.flatten.head mustBe "subject 0" (System.currentTimeMillis() - start).toInt must be < quickReactionTime @@ -58,14 +63,101 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec correctTimeout = endpoint.sparqlParams.timeout.contains(timeout) }) val limit = 1000 * 1000 * 1000 - val entities = new LocalSparqlSelectIterator(task, sparqlEndpoint, limit, Some(reportUpdater)) + val entities = LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint), None, limit, Some(reportUpdater)) entities.headOption // Needed to actually execute the query correctTimeout mustBe true } - private def sparqlEndpointStub(selectCallback: SparqlEndpoint => Unit = _ => {}): SparqlEndpoint = { + it should "generate one query per input entity when useDefaultDataset is set and the template references entity values" in { + val query = """SELECT ?p ?o WHERE { <{{ input.entity.s }}> ?p ?o }""" + val rowsPerQuery = 2 + val task = SparqlSelectCustomTask(query, limit = rowsPerQuery.toString, useDefaultDataset = true) + + val capturedQueries = collection.mutable.ArrayBuffer.empty[String] + val sparqlEndpoint = sparqlEndpointStub(queryCapture = q => capturedQueries += q) + val stubDataset = new StubRdfDataset(sparqlEndpoint) + + val inputSchema = EntitySchema("", typedPaths = IndexedSeq(TypedPath("s", ValueType.URI))) + val inputEntities = Seq( + Entity("urn:in:1", IndexedSeq(Seq("http://example.org/a")), inputSchema), + Entity("urn:in:2", IndexedSeq(Seq("http://example.org/b")), inputSchema) + ) + val inputTable = GenericEntityTable(inputEntities, inputSchema, PlainTask("inputTask", DatasetSpec(stubDataset))) + + val activityContextMock = TestMocks.activityContextMock() + val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) + + val results = LocalSparqlSelectExecutor() + .executeOnDefaultDatasetPerEntity(task, stubDataset, inputTable, outputTask = None, executionReportUpdater = reportUpdater) + .toList + + capturedQueries.toSeq must have size 2 + capturedQueries(0) must include ("") + capturedQueries(1) must include ("") + // Bindings from both queries are flattened into the output: rowsPerQuery rows × 2 queries. + results.size mustBe (rowsPerQuery * 2) + } + + it should "fail when an input entity is missing a value referenced by the template" in { + val query = """SELECT ?p ?o WHERE { <{{ input.entity.s }}> ?p ?o }""" + val task = SparqlSelectCustomTask(query, useDefaultDataset = true) + + val sparqlEndpoint = sparqlEndpointStub() + val stubDataset = new StubRdfDataset(sparqlEndpoint) + + val inputSchema = EntitySchema("", typedPaths = IndexedSeq(TypedPath("s", ValueType.URI))) + val inputEntities = Seq( + Entity("urn:in:1", IndexedSeq(Seq()), inputSchema) + ) + val inputTable = GenericEntityTable(inputEntities, inputSchema, PlainTask("inputTask", DatasetSpec(stubDataset))) + + val activityContextMock = TestMocks.activityContextMock() + val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) + + an[UnboundVariablesException] must be thrownBy { + LocalSparqlSelectExecutor() + .executeOnDefaultDatasetPerEntity(task, stubDataset, inputTable, outputTask = None, executionReportUpdater = reportUpdater) + .toList + } + } + + it should "evaluate a Jinja query template using the graph variable from the task parameters" in { + val graphUri = "http://example.org/testGraph" + val query = """SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } }""" + val task = SparqlSelectCustomTask(query) + var capturedQuery = "" + val activityContextMock = TestMocks.activityContextMock() + val reportUpdater = SparqlSelectExecutionReportUpdater(PlainTask("task", task), activityContextMock) + val sparqlEndpoint = sparqlEndpointStub(queryCapture = q => capturedQuery = q) + LocalSparqlSelectExecutor().executeOnSparqlEndpoint(task, taskWithEndpoint(sparqlEndpoint, graphUri = Some(graphUri)), None, executionReportUpdater = Some(reportUpdater)).headOption + + task.outputSchema.typedPaths.map(_.toUntypedPath.normalizedSerialization) mustBe IndexedSeq("s", "p", "o") + capturedQuery must include(s"<$graphUri>") + capturedQuery must not include "input.config.graph" + } + + private def taskWithEndpoint(sparqlEndpoint: SparqlEndpoint, graphUri: Option[String] = None): Task[DatasetSpec[RdfDataset]] = { + PlainTask("testDataset", DatasetSpec(new StubRdfDataset(sparqlEndpoint, graphUri))) + } + + private class StubRdfDataset(endpoint: SparqlEndpoint, graphUri: Option[String] = None) extends RdfDataset { + override def sparqlEndpoint: SparqlEndpoint = endpoint + override def parameters(implicit pluginContext: PluginContext): ParameterValues = { + graphUri match { + case Some(g) => ParameterValues.fromStringMap(Map("graph" -> g)) + case None => ParameterValues.empty + } + } + override def source(implicit userContext: UserContext): DataSource = ??? + override def linkSink(implicit userContext: UserContext): LinkSink = ??? + override def entitySink(implicit userContext: UserContext): EntitySink = ??? + } + + private def sparqlEndpointStub(selectCallback: SparqlEndpoint => Unit = _ => {}, + graphUri: Option[String] = None, + queryCapture: String => Unit = _ => {}): SparqlEndpoint = { new SparqlEndpoint { - var sparqlParamsIntern = SparqlParams() + var sparqlParamsIntern = SparqlParams(graph = graphUri) override def sparqlParams: SparqlParams = sparqlParamsIntern override def withSparqlParams(sparqlParams: SparqlParams): SparqlEndpoint = { @@ -75,6 +167,7 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec override def select(query: String, limit: Int)(implicit userContext: UserContext): SparqlResults = { selectCallback(this) + queryCapture(query) SparqlResults(Seq("s", "p", "o"), new TraversableIterator[SortedMap[String, RdfNode]] { override def foreach[U](f: SortedMap[String, RdfNode] => U): Unit = { var i = 0 @@ -87,4 +180,4 @@ class LocalSparqlSelectExecutorTest extends AnyFlatSpec } } } -} +} \ No newline at end of file diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala deleted file mode 100644 index b222af138a..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/sparql/SparqlTemplatingTest.scala +++ /dev/null @@ -1,57 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.sparql - -import org.silkframework.plugins.dataset.rdf.tasks.templating.{Row, SparqlVelocityTemplating, TaskProperties, TemplateExecutionException} -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers - -class SparqlTemplatingTest extends AnyFlatSpec with Matchers { - behavior of "SPARQL Templating" - - it should "render a simple Velocity template" in { - val stringTemplate = - """SELECT * WHERE { - | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") - |}""".stripMargin - val template = SparqlVelocityTemplating.createTemplate(stringTemplate) - for(i <- 1 to 10) { - val rendered = SparqlVelocityTemplating.renderTemplate( - template, Row(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i")), TaskProperties(Map.empty, Map.empty)) - rendered mustBe - s"""SELECT * WHERE { - | rdfs:label "some label $i" - |}""".stripMargin - } - } - - it should "render templates safely as long as safe methods are used, no injection attack possible" in { - val template = executeTemplate("""$row.plainLiteral("var")""", Map("var" -> "\"Delete everything!!!\"")) - template mustBe "\"\\\"Delete everything!!!\\\"\"" - } - - it should "fail if the value for uri() is not an URI" in { - intercept[TemplateExecutionException] { - executeTemplate("""$row.uri("uri")""", Map("uri" -> "http:// broken Uri >")) - } - } - - it should "output a nice error message when there is a syntax error" in { - - } - - it should "throw exception when a non-available method or variable is used" in { - intercept[TemplateExecutionException] { - executeTemplate("""Not existing $test""", Map.empty) - } - intercept[TemplateExecutionException] { - executeTemplate("""Not existing $row.notExisting("blah")""", Map("a" -> "A")) - } - intercept[TemplateExecutionException] { - executeTemplate("""Not existing $row.uri("notExists")""", Map("a" -> "A")) - } - } - - private def executeTemplate(templateString: String, bindings: Map[String, String]): String = { - val template = SparqlVelocityTemplating.createTemplate(templateString) - SparqlVelocityTemplating.renderTemplate(template, Row(bindings), TaskProperties(Map.empty, Map.empty)) - } -} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/CrossProductIteratorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/CrossProductIteratorTest.scala similarity index 90% rename from silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/CrossProductIteratorTest.scala rename to silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/CrossProductIteratorTest.scala index e1d1579bec..39bdf7ee2b 100644 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/CrossProductIteratorTest.scala +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/CrossProductIteratorTest.scala @@ -1,43 +1,43 @@ -package org.silkframework.plugins.dataset.rdf - -import org.silkframework.plugins.dataset.rdf.executors.CrossProductIterator +package org.silkframework.plugins.dataset.rdf.tasks.templating + import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers - -class CrossProductIteratorTest extends AnyFlatSpec with Matchers { - behavior of "Cross Product Matcher" - - it should "calculate the cross product" in { - val it = CrossProductIterator(IndexedSeq(Seq("A", "B"), Seq("a"), Seq("1", "2", "3")), IndexedSeq("prop1", "prop2", "prop3")) - it.toSeq mustBe Seq( - Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "1"), - Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "2"), - Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "3"), - Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "1"), - Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "2"), - Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "3") - ) - } - - it should "leave out assignments with zero values" in { - val it = CrossProductIterator(IndexedSeq(Seq("A", "B"), Seq(), Seq("1", "2", "3")), IndexedSeq("prop1", "prop2", "prop3")) - it.toSeq mustBe Seq( - Map("prop1" -> "A", "prop3" -> "1"), - Map("prop1" -> "A", "prop3" -> "2"), - Map("prop1" -> "A", "prop3" -> "3"), - Map("prop1" -> "B", "prop3" -> "1"), - Map("prop1" -> "B", "prop3" -> "2"), - Map("prop1" -> "B", "prop3" -> "3") - ) - val it2 = CrossProductIterator(IndexedSeq(Seq(), Seq("a", "b"), Seq()), IndexedSeq("prop1", "prop2", "prop3")) - it2.toSeq mustBe Seq( - Map("prop2" -> "a"), - Map("prop2" -> "b") - ) - } - - it should "produce one empty assignment when all inputs are empty" in { - val it = CrossProductIterator(IndexedSeq(Seq(), Seq(), Seq()), IndexedSeq("prop1", "prop2", "prop3")) - it.toSeq mustBe Seq(Map()) - } -} +import org.scalatest.matchers.must.Matchers +import org.silkframework.plugins.dataset.rdf.tasks.templating.SparqlLegacyTemplate.CrossProductIterator + +class CrossProductIteratorTest extends AnyFlatSpec with Matchers { + behavior of "Cross Product Matcher" + + it should "calculate the cross product" in { + val it = CrossProductIterator(IndexedSeq(Seq("A", "B"), Seq("a"), Seq("1", "2", "3")), IndexedSeq("prop1", "prop2", "prop3")) + it.toSeq mustBe Seq( + Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "1"), + Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "2"), + Map("prop1" -> "A", "prop2" -> "a", "prop3" -> "3"), + Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "1"), + Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "2"), + Map("prop1" -> "B", "prop2" -> "a", "prop3" -> "3") + ) + } + + it should "leave out assignments with zero values" in { + val it = CrossProductIterator(IndexedSeq(Seq("A", "B"), Seq(), Seq("1", "2", "3")), IndexedSeq("prop1", "prop2", "prop3")) + it.toSeq mustBe Seq( + Map("prop1" -> "A", "prop3" -> "1"), + Map("prop1" -> "A", "prop3" -> "2"), + Map("prop1" -> "A", "prop3" -> "3"), + Map("prop1" -> "B", "prop3" -> "1"), + Map("prop1" -> "B", "prop3" -> "2"), + Map("prop1" -> "B", "prop3" -> "3") + ) + val it2 = CrossProductIterator(IndexedSeq(Seq(), Seq("a", "b"), Seq()), IndexedSeq("prop1", "prop2", "prop3")) + it2.toSeq mustBe Seq( + Map("prop2" -> "a"), + Map("prop2" -> "b") + ) + } + + it should "produce one empty assignment when all inputs are empty" in { + val it = CrossProductIterator(IndexedSeq(Seq(), Seq(), Seq()), IndexedSeq("prop1", "prop2", "prop3")) + it.toSeq mustBe Seq(Map()) + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala new file mode 100644 index 0000000000..ab84b1f950 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlJinjaTemplateTest.scala @@ -0,0 +1,206 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.entity.{Entity, EntitySchema} +import org.silkframework.plugins.templating.jinja.JinjaTemplateEngine +import org.silkframework.runtime.templating.TemplateVariableValue +import org.silkframework.runtime.templating.exceptions.{TemplateEvaluationException, UnboundVariablesException} +import org.silkframework.runtime.validation.ValidationException + +class SparqlJinjaTemplateTest extends AnyFlatSpec with Matchers { + + behavior of "SPARQL templating with the Jinja Template Engine" + + it should "render values from the current input entity via input.entity" in { + val result = generate( + """INSERT DATA { <{{ input.entity.subject }}> "value" } ;""", + assignments = Map("subject" -> "urn:entity:1") + ) + result must include("") + } + + it should "render parameters of the connected input task via input.config" in { + val result = generate( + """SELECT * WHERE { GRAPH <{{ input.config.graph }}> { ?s ?p ?o } }""", + taskProps = TaskProperties(Map("graph" -> "urn:graph:1"), Map.empty) + ) + result must include("") + } + + it should "render parameters of the connected output task via output.config" in { + val result = generate( + """INSERT DATA { GRAPH <{{ output.config.graph }}> { } } ;""", + taskProps = TaskProperties(Map.empty, Map("graph" -> "urn:graph:out")) + ) + result must include("") + } + + it should "render project and global template variables" in { + val project = new TemplateVariableValue("myVar", Seq("project"), Seq("projectValue")) + val global = new TemplateVariableValue("myVar", Seq("global"), Seq("globalValue")) + val result = generate( + """{{ project.myVar }} / {{ global.myVar }}""", + templateVariables = Seq(project, global) + ) + result must include("projectValue / globalValue") + } + + it should "reject old syntax (row, inputProperties, outputProperties)" in { + intercept[UnboundVariablesException] { + generate("""{{ row.uri("x") }}""", assignments = Map("x" -> "urn:a:b")) + } + intercept[UnboundVariablesException] { + generate("""{{ inputProperties.uri("graph") }}""", + taskProps = TaskProperties(Map("graph" -> "urn:g:1"), Map.empty)) + } + } + + it should "fail if a referenced variable is not provided" in { + intercept[UnboundVariablesException] { + generate("""{{ project.missing }}""") + } + intercept[UnboundVariablesException] { + generate("""{{ global.missing }}""") + } + intercept[UnboundVariablesException] { + SparqlTemplate.create(JinjaTemplateEngine.id, """{{ input.entity.subject }}""") + .generate(None, TaskProperties(Map.empty, Map.empty)).head + } + intercept[UnboundVariablesException] { + generate("""{{ input.entity.existing }} {{ input.entity.missing }}""", assignments = Map("existing" -> "urn:x:1")) + } + } + + it should "derive the input schema from input.entity.* references" in { + val template = SparqlTemplate.create(JinjaTemplateEngine.id, + """ + |INSERT DATA { + | <{{ input.entity.subject }}> "{{ input.entity.label }}" . + | <{{ input.entity.subject }}> "value" . + |} ; + |""".stripMargin) + val paths = template.inputSchema.typedPaths.flatMap(_.property).map(_.propertyUri.uri) + paths.toSet mustBe Set("subject", "label") + } + + it should "report a Jinja template with no entity references as static" in { + val staticTemplate = SparqlTemplate.create(JinjaTemplateEngine.id, + """INSERT DATA { <{{ input.config.target }}> "v" } ;""") + staticTemplate.isStaticTemplate mustBe true + val dynamicTemplate = SparqlTemplate.create(JinjaTemplateEngine.id, + """INSERT DATA { <{{ input.entity.subject }}> "v" } ;""") + dynamicTemplate.isStaticTemplate mustBe false + } + + it should "render a realistic SPARQL Update template combining all variable scopes, filters and a conditional" in { + val templateString = + """PREFIX rdfs: + |WITH <{{ output.config.graph | validate_uri }}> + |DELETE { <{{ input.entity.subject | validate_uri }}> ?p ?o } + |INSERT { + | <{{ input.entity.subject | validate_uri }}> rdfs:label "{{ input.entity.label | escape_literal }}" . + | <{{ input.entity.subject | validate_uri }}> <{{ project.labelProp | validate_uri }}> "{{ global.author | escape_literal }}" . + | {% if input.entity.comment %} + | <{{ input.entity.subject | validate_uri }}> rdfs:comment '''{{ input.entity.comment | escape_multiline_literal }}''' . + | {% endif %} + |} + |WHERE { <{{ input.entity.subject | validate_uri }}> ?p ?o } ; + |""".stripMargin + val template = SparqlTemplate.create(JinjaTemplateEngine.id, templateString) + + template.inputSchema.typedPaths.flatMap(_.property).map(_.propertyUri.uri).toSet mustBe + Set("subject", "label", "comment") + template.isStaticTemplate mustBe false + + val taskProps = TaskProperties(inputTask = Map.empty, outputTask = Map("graph" -> "urn:graph:out")) + val projectAndGlobal = Seq( + new TemplateVariableValue("labelProp", Seq("project"), Seq("urn:prop:label")), + new TemplateVariableValue("author", Seq("global"), Seq("Jane")) + ) + + val rendered = template.generate( + entity = Some(entityFromMap(Map( + "subject" -> "urn:entity:1", + "label" -> """O'Reilly & "friends"""", + "comment" -> "has ''' triple quotes" + ))), + taskProperties = taskProps, + templateVariables = projectAndGlobal + ).head + + rendered must include("WITH ") + rendered must include("") + rendered must include(""""O\'Reilly & \"friends\""""") + rendered must include("") + rendered must include(""""Jane"""") + rendered must include("""has \'\'\' triple quotes""") + + // With an empty `comment`, the {% if %} branch is skipped. + val withoutComment = template.generate( + entity = Some(entityFromMap(Map("subject" -> "urn:entity:1", "label" -> "plain", "comment" -> ""))), + taskProperties = taskProps, + templateVariables = projectAndGlobal + ).head + withoutComment must not include "rdfs:comment" + + // An invalid IRI piped through validate_uri surfaces as a TemplateEvaluationException. + intercept[TemplateEvaluationException] { + template.generate( + entity = Some(entityFromMap(Map("subject" -> "not a uri", "label" -> "plain", "comment" -> ""))), + taskProperties = taskProps, + templateVariables = projectAndGlobal + ).head + } + } + + it should "alias input.entity variables to bare references when defaultScope = input.entity" in { + val template = SparqlTemplate.create(JinjaTemplateEngine.id, + """<{{ subject }}> "{{ input.entity.label }}"""", + defaultScope = Seq("input", "entity")) + template.inputSchema.typedPaths.flatMap(_.property).map(_.propertyUri.uri).toSet mustBe Set("subject", "label") + val rendered = template.generate( + entity = Some(entityFromMap(Map("subject" -> "urn:entity:1", "label" -> "hello"))), + taskProperties = TaskProperties(Map.empty, Map.empty) + ).head + rendered mustBe """ "hello"""" + } + + it should "alias variables from an arbitrary scope without polluting the input entity schema" in { + val template = SparqlTemplate.create(JinjaTemplateEngine.id, + """SELECT * WHERE { GRAPH <{{ graph }}> { ?s ?p ?o } }""", + defaultScope = Seq("input", "config")) + val rendered = template.generate( + entity = None, + taskProperties = TaskProperties(Map("graph" -> "urn:graph:1"), Map.empty) + ).head + rendered must include("") + template.inputSchema.typedPaths mustBe empty + } + + it should "expose multi-valued entity properties as lists iterable in the Jinja template" in { + val template = SparqlTemplate.create(JinjaTemplateEngine.id, + """{% for s in input.entity.subject %}INSERT DATA { <{{ s }}> "x" } ; + |{% endfor %}""".stripMargin) + val schema = EntitySchema("", IndexedSeq(UntypedPath("subject").asUntypedValueType)) + val entity = Entity("urn:e:1", IndexedSeq(Seq("urn:a:1", "urn:a:2")), schema) + val rendered = template.generate(Some(entity), TaskProperties(Map.empty, Map.empty)).head + rendered must include("") + rendered must include("") + } + + private def generate(template: String, + assignments: Map[String, String] = Map.empty, + taskProps: TaskProperties = TaskProperties(Map.empty, Map.empty), + templateVariables: Seq[TemplateVariableValue] = Seq.empty): String = { + val entity = if (assignments.isEmpty) None else Some(entityFromMap(assignments)) + SparqlTemplate.create(JinjaTemplateEngine.id, template).generate(entity, taskProps, templateVariables).head + } + + private def entityFromMap(values: Map[String, String]): Entity = { + val entries = values.toIndexedSeq + val schema = EntitySchema("", entries.map { case (k, _) => UntypedPath(k).asUntypedValueType }) + Entity("urn:test", entries.map { case (_, v) => Seq(v) }, schema) + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractorTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractorTest.scala new file mode 100644 index 0000000000..35e02ef666 --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlSelectVarExtractorTest.scala @@ -0,0 +1,83 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers + +class SparqlSelectVarExtractorTest extends AnyFlatSpec with Matchers { + + behavior of "SparqlSelectVarExtractor" + + it should "extract plain projected variables" in { + extract("SELECT ?a ?b WHERE { ?a ?p ?b }") mustBe Seq("a", "b") + } + + it should "strip DISTINCT" in { + extract("SELECT DISTINCT ?x ?y WHERE { ?x ?p ?y }") mustBe Seq("x", "y") + } + + it should "strip REDUCED" in { + extract("SELECT REDUCED ?x ?y WHERE { ?x ?p ?y }") mustBe Seq("x", "y") + } + + it should "return the alias of a single-expression projection" in { + extract("SELECT (?x + 1 AS ?sum) WHERE { ?x ?p ?o }") mustBe Seq("sum") + } + + it should "return only the outer AS alias for nested function calls" in { + extract("SELECT (COUNT(?x) AS ?n) WHERE { ?x ?p ?o }") mustBe Seq("n") + } + + it should "mix plain variables and AS aliases" in { + extract("SELECT ?a (?x + 1 AS ?sum) ?b WHERE { ?a ?p ?b }") mustBe Seq("a", "sum", "b") + } + + it should "fall back to all variables for SELECT *" in { + extract("SELECT * WHERE { ?s ?p ?o }") mustBe Seq("s", "p", "o") + } + + it should "fall back to all variables for SELECT * with a GRAPH clause" in { + extract("SELECT * WHERE { GRAPH { ?s ?p ?o } }") mustBe Seq("s", "p", "o") + } + + it should "be case-insensitive on SELECT / WHERE / DISTINCT / AS" in { + extract("select distinct ?a (?x + 1 as ?sum) where { ?a ?p ?o }") mustBe Seq("a", "sum") + } + + it should "tolerate a Jinja placeholder inside a string literal" in { + extract("""SELECT ?s WHERE { ?s rdfs:label "{{ input.entity.name }}" }""") mustBe Seq("s") + } + + it should "tolerate a Jinja placeholder in a numeric position" in { + extract("SELECT ?s WHERE { ?s ?p ?o } LIMIT {{ input.config.max }}") mustBe Seq("s") + } + + it should "tolerate a Jinja placeholder as a URI fragment" in { + extract("SELECT ?s WHERE { ?s a <{{ input.config.type }}> }") mustBe Seq("s") + } + + it should "accept projections terminated by a brace without a WHERE keyword" in { + extract("SELECT ?a ?b { ?a ?p ?b }") mustBe Seq("a", "b") + } + + it should "deduplicate while preserving first-appearance order" in { + extract("SELECT ?a ?b ?a WHERE { ?a ?p ?b }") mustBe Seq("a", "b") + } + + it should "return the outer projection, not inner sub-query variables" in { + extract("SELECT ?a WHERE { SELECT ?x ?y WHERE { ?x ?p ?y } }") mustBe Seq("a") + } + + it should "stop at FROM named graph clauses" in { + extract("SELECT ?a ?b FROM WHERE { ?a ?p ?b }") mustBe Seq("a", "b") + } + + it should "return an empty sequence for non-SELECT queries" in { + extract("INSERT DATA { }") mustBe empty + } + + it should "return an empty sequence for an ASK query" in { + extract("ASK WHERE { ?s ?p ?o }") mustBe empty + } + + private def extract(query: String): Seq[String] = SparqlSelectVarExtractor.extractSelectVars(query) +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala new file mode 100644 index 0000000000..a88dbc0c2b --- /dev/null +++ b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplateVelocityTest.scala @@ -0,0 +1,132 @@ +package org.silkframework.plugins.dataset.rdf.tasks.templating + +import org.apache.jena.vocabulary.XSD +import org.silkframework.entity.paths.UntypedPath +import org.silkframework.entity.{Entity, EntitySchema} +import org.silkframework.plugins.templating.velocity.VelocityTemplateEngine +import org.silkframework.runtime.validation.ValidationException +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.must.Matchers +import org.silkframework.runtime.templating.{InMemoryTemplateVariablesReader, TemplateVariables} +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException + +class SparqlTemplateVelocityTest extends AnyFlatSpec with Matchers { + + behavior of "SPARQL templating with the Velocity Template Engine" + + private val sparqlUpdateTemplate = + s"""PREFIX rdf: + |PREFIX xsd: <${XSD.getURI}> + |DELETE DATA { $$row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $$row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } ; + |INSERT DATA { $$row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $$row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3")^^xsd:int } ;""".stripMargin + + it should "output the correct input paths of the template" in { + val templateString = + """ + |$row.uri("subject") + |#if ( $row.exists("somePath") ) + | Plain: $row.plainLiteral("somePath") + | Raw: $row.rawUnsafe("trustedValuePath") + |#end + |""".stripMargin + val compiled = VelocityTemplateEngine().compile(templateString) + compiled.variables.get.map(_.name).sorted mustBe Seq("row") + } + + it should "validate without problems for valid templates" in { + val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> + |INSERT DATA { + | "entity 1" . + | #if ($$row.exists("input1")) + | $$row.uri("input1") $$row.plainLiteral("input2")^^xsd:string + | #end + |}; + |""".stripMargin + + validate(sparqlUpdateTemplate) + VelocityTemplateEngine().compile(templateWithLogic).variables.get.map(_.name).sorted mustBe Seq("row") + validate(templateWithLogic) + } + + it should "always validate templates as correct if rawUnsafe() is used, because there is no way to generate meaningful examples to validate" in { + validate("""Completely broken SPARQL Update query with $row.rawUnsafe("something")""") + } + + it should "raise a validation error when the template is invalid" in { + intercept[ValidationException] { + validate("""DELETE DATA { $row.uri("test") rdf:label } ;""") + } + intercept[ValidationException] { + validate("""DELETE DATA { rdf:label $row.uri(3) ;""") + } + intercept[ValidationException] { + // No rdf prefix defined + validate("""DELETE DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } ; + | INSERT DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3") } ;""".stripMargin) + } + intercept[ValidationException] { + validate("""PREFIX foaf: + | + |WITH + |DELETE { ?person ?property ?value } + |WHERE { ?person ?property ?value ; foaf:givenName 'Fred } ;""".stripMargin) // Missing closing ' for literal + } + validate(sparqlUpdateTemplate.dropRight(1), batchSize = 1) // Dropped ';' at the end, not batch supported, but batch size is 1 + intercept[ValidationException] { + validate(sparqlUpdateTemplate.dropRight(1)) // Dropped ';' at the end, not batch supported + } + } + + it should "render a simple Velocity template" in { + val stringTemplate = + """SELECT * WHERE { + | $row.uri("uriProp") rdfs:label $row.plainLiteral("stringProp") + |}""".stripMargin + val template = new SparqlLegacyTemplate(VelocityTemplateEngine().compile(stringTemplate)) + for(i <- 1 to 10) { + val rendered = template.generate(Some(entityFromMap(Map("uriProp" -> s"http://entity$i", "stringProp" -> s"some label $i"))), TaskProperties(Map.empty, Map.empty)).head + rendered mustBe + s"""SELECT * WHERE { + | rdfs:label "some label $i" + |}""".stripMargin + } + } + + it should "render templates safely as long as safe methods are used, no injection attack possible" in { + val template = generate("""$row.plainLiteral("var")""", Map("var" -> "\"Delete everything!!!\"")) + template mustBe "\"\\\"Delete everything!!!\\\"\"" + } + + it should "fail if the value for uri() is not an URI" in { + intercept[TemplateEvaluationException] { + generate("""$row.uri("uri")""", Map("uri" -> "http:// broken Uri >")) + } + } + + it should "throw exception when a non-available method or variable is used" in { + intercept[TemplateEvaluationException] { + generate("""Not existing $test""", Map.empty) + } + intercept[TemplateEvaluationException] { + generate("""Not existing $row.notExisting("blah")""", Map("a" -> "A")) + } + intercept[TemplateEvaluationException] { + generate("""Not existing $row.uri("notExists")""", Map("a" -> "A")) + } + } + + private def generate(templateString: String, bindings: Map[String, String]): String = { + val entity = if (bindings.isEmpty) None else Some(entityFromMap(bindings)) + new SparqlLegacyTemplate(VelocityTemplateEngine().compile(templateString)).generate(entity, TaskProperties(Map.empty, Map.empty)).head + } + + private def entityFromMap(values: Map[String, String]): Entity = { + val entries = values.toIndexedSeq + val schema = EntitySchema("", entries.map { case (k, _) => UntypedPath(k).asUntypedValueType }) + Entity("urn:test", entries.map { case (_, v) => Seq(v) }, schema) + } + + def validate(template: String, batchSize: Int = 2): Unit = { + new SparqlLegacyTemplate(VelocityTemplateEngine().compile(template)).validate(InMemoryTemplateVariablesReader(TemplateVariables.empty, Set.empty), Some(batchSize)) + } +} diff --git a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala b/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala deleted file mode 100644 index 2ebc58ba0a..0000000000 --- a/silk-plugins/silk-plugins-rdf/src/test/scala/org/silkframework/plugins/dataset/rdf/tasks/templating/SparqlTemplatingEngineVelocityTest.scala +++ /dev/null @@ -1,77 +0,0 @@ -package org.silkframework.plugins.dataset.rdf.tasks.templating - -import org.apache.jena.vocabulary.XSD -import org.silkframework.runtime.validation.ValidationException -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.must.Matchers - -class SparqlTemplatingEngineVelocityTest extends AnyFlatSpec with Matchers { - behavior of "Velocity SPARQL Templating Engine" - - private val sparqlUpdateTemplate = - s"""PREFIX rdf: - |PREFIX xsd: <${XSD.getURI}> - |DELETE DATA { $$row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $$row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } ; - |INSERT DATA { $$row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $$row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3")^^xsd:int } ;""".stripMargin - - it should "output the correct input paths of the template" in { - val templateString = - """ - |$row.uri("subject") - |#if ( $row.exists("somePath") ) - | Plain: $row.plainLiteral("somePath") - | Raw: $row.rawUnsafe("trustedValuePath") - |#end - |""".stripMargin - val engine = SparqlTemplatingEngineVelocity(templateString, 1) - engine.inputPaths().sorted mustBe Seq("somePath", "subject", "trustedValuePath") - } - - private val templateWithLogic = s"""PREFIX xsd: <${XSD.getURI}> - |INSERT DATA { - | "entity 1" . - | #if ($$row.exists("input1")) - | $$row.uri("input1") $$row.plainLiteral("input2")^^xsd:string - | #end - |}; - |""".stripMargin - - it should "validate without problems for valid templates" in { - validate(sparqlUpdateTemplate) - SparqlTemplatingEngineVelocity(templateWithLogic, 1).inputPaths().sorted mustBe Seq("input1", "input2") - validate(templateWithLogic) - } - - it should "always validate templates as correct if rawUnsafe() is used, because there is no way to generate meaningful examples to validate" in { - validate("""Completely broken SPARQL Update query with $row.rawUnsafe("something")""") - } - - it should "raise a validation error when the template is invalid" in { - intercept[ValidationException] { - validate("""DELETE DATA { $row.uri("test") rdf:label } ;""") - } - intercept[ValidationException] { - validate("""DELETE DATA { rdf:label $row.uri(3) ;""") - } - intercept[ValidationException] { - // No rdf prefix defined - validate("""DELETE DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA2") } ; - | INSERT DATA { $row.uri("PROP_FROM_ENTITY_SCHEMA1") rdf:label $row.plainLiteral("PROP_FROM_ENTITY_SCHEMA3") } ;""".stripMargin) - } - intercept[ValidationException] { - validate("""PREFIX foaf: - | - |WITH - |DELETE { ?person ?property ?value } - |WHERE { ?person ?property ?value ; foaf:givenName 'Fred } ;""".stripMargin) // Missing closing ' for literal - } - validate(sparqlUpdateTemplate.dropRight(1), batchSize = 1) // Dropped ';' at the end, not batch supported, but batch size is 1 - intercept[ValidationException] { - validate(sparqlUpdateTemplate.dropRight(1)) // Dropped ';' at the end, not batch supported - } - } - - def validate(template: String, batchSize: Int = 2): Unit = { - SparqlTemplatingEngineVelocity(template, batchSize).validate() - } -} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule b/silk-plugins/silk-plugins-templating-jinja/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule new file mode 100644 index 0000000000..4d8fd8f332 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule @@ -0,0 +1 @@ +org.silkframework.plugins.templating.jinja.JinjaTemplatingPlugins diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollector.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollector.scala new file mode 100644 index 0000000000..6ed97e45f9 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollector.scala @@ -0,0 +1,45 @@ +package org.silkframework.plugins.templating.jinja + +import com.hubspot.jinjava.tree.{ExpressionNode, Node, TagNode} +import com.hubspot.jinjava.tree.parse.ExpressionToken +import org.silkframework.runtime.templating.TemplateMethodUsage + +import scala.jdk.CollectionConverters.IterableHasAsScala +import scala.util.matching.Regex + +/** + * Collects all method usages on a given variable in a Jinja template. + * Only methods with a single string constant parameter are returned. + */ +class JinjaMethodCollector { + + /** + * Collects all usages of methods called on the given variable name in the template node. + */ + def collect(node: Node, variableName: String): Seq[TemplateMethodUsage] = { + node match { + case tagNode: TagNode => + val fromHelpers = extractMethodUsages(tagNode.getHelpers, variableName) + val fromChildren = tagNode.getChildren.asScala.flatMap(collect(_, variableName)).toSeq + fromHelpers ++ fromChildren + case exprNode: ExpressionNode => + val expr = exprNode.getMaster.asInstanceOf[ExpressionToken].getExpr + extractMethodUsages(expr, variableName) + case _ => + node.getChildren.asScala.flatMap(collect(_, variableName)).toSeq + } + } + + private def extractMethodUsages(expression: String, varName: String): Seq[TemplateMethodUsage] = { + JinjaMethodCollector.methodCallPattern(varName).findAllMatchIn(expression).map { m => + TemplateMethodUsage(m.group(1), m.group(2)) + }.toSeq + } +} + +object JinjaMethodCollector { + + // Matches: varName.methodName("param") or varName.methodName('param') + private def methodCallPattern(varName: String): Regex = + s"""${Regex.quote(varName)}\\.([a-zA-Z_][a-zA-Z0-9_]*)\\(["']([^"']*)["']\\)""".r +} \ No newline at end of file diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala new file mode 100644 index 0000000000..695c638c24 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplateEngine.scala @@ -0,0 +1,161 @@ +package org.silkframework.plugins.templating.jinja + +import com.hubspot.jinjava.interpret.{InterpretException, JinjavaInterpreter, UnknownTokenException} +import com.hubspot.jinjava.tree.Node +import com.hubspot.jinjava.{Jinjava, JinjavaConfig} +import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.templating.exceptions.{TemplateEvaluationException, UnboundVariablesException} +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} + +import java.io.Writer +import java.util.EmptyStackException +import scala.jdk.CollectionConverters.ListHasAsScala +import scala.util.control.Breaks.{break, breakable} + +@Plugin( + id = JinjaTemplateEngine.id, + label = "Jinja" +) +case class JinjaTemplateEngine() extends TemplateEngine { + + override def compile(templateString: String): JinjaTemplate = { + new JinjaTemplate(JinjaTemplateEngine.interpreter().parse(templateString)) + } +} + +object JinjaTemplateEngine { + + final val id = "jinja" + + private val interpreters = new ThreadLocal[JinjavaInterpreter] { + override protected def initialValue(): JinjavaInterpreter = { + withPluginClassLoader { + val config = JinjavaConfig.newBuilder.withFailOnUnknownTokens(true).build() + val jinja = new Jinjava(config) + TransformFilters.register(jinja.getGlobalContext) + val interpreter = jinja.newInterpreter() + JinjavaInterpreter.pushCurrent(interpreter) // Macros will request the current interpreter (thread-local) + interpreter + } + } + } + + /** + * Runs `body` with the thread context class loader set to this plugin's class loader. + * Jinjava loads its shaded EL `ExpressionFactory` via the context class loader (ServiceLoader), + * so both parsing and evaluation must run under a class loader that can see the plugin jar. + * See https://github.com/HubSpot/jinjava/issues/317. + */ + def withPluginClassLoader[T](body: => T): T = { + val curClassLoader = Thread.currentThread.getContextClassLoader + try { + Thread.currentThread.setContextClassLoader(this.getClass.getClassLoader) + body + } finally { + Thread.currentThread.setContextClassLoader(curClassLoader) + } + } + + /** + * Retrieves an interpreter instance. + */ + def interpreter(): JinjavaInterpreter = { + val inter = interpreters.get() + // We need to reset a number of properties. + // It would be better to change this to create a fresh instance on every call. But then we need to check carefully for memory leaks. + inter.getContext.reset() + inter.getContext.clear() + breakable { + while(true) { + try { + inter.getContext.popRenderStack() + } catch { + case _: EmptyStackException => + break() + } + } + } + do { + inter.removeLastError() + } while(!inter.getLastError.isEmpty) + inter + } + +} + +class JinjaTemplate(val node: Node) extends CompiledTemplate { + + override val variables: Option[Seq[TemplateVariableName]] = { + val result = new JinjaVariableCollector().collect(node) + Some(result.unboundVars) + } + + override def methodUsages(variableName: String): Seq[TemplateMethodUsage] = { + new JinjaMethodCollector().collect(node, variableName) + } + + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig = EvaluationConfig()): Unit = { + // Check if values for all variables are provided + // We do this explicitly because the Jinja-internal checks are not sufficient + // (The implementation ignores expressions with filters and only returns the first missing var) + var missingVars: Seq[TemplateVariableName] = Seq.empty + for (vars <- variables) { + // Collect all scoped variables of the form 'scope.name' + val names = values.map(_.asName) + // Variables of the form 'scope.name' can also be addressed by any scope prefix (e.g., 'input' or 'input.parameters') + val scopes = values.filter(_.scope.nonEmpty).flatMap { v => + (1 to v.scope.length).map(n => new TemplateVariableName(v.scope.take(n).mkString("."), Seq.empty)) + } + // Find missing vars + val existingVars = (names ++ scopes).toSet + missingVars = vars.filterNot(existingVars.contains) + } + if (missingVars.nonEmpty) { + if(evaluationConfig.ignoreUnboundVariables) { + // Leave unbound variables as they are in the result. + val extendedValues = values ++ missingVars.map(mv => new TemplateVariableValue(mv.name, mv.scope, Seq(mv.scopedName))) + evaluate(convertValues(extendedValues), writer) + } else { + throw new UnboundVariablesException(missingVars) + } + } else { + evaluate(convertValues(values), writer) + } + } + + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + // Render the template + val interpreter = JinjaTemplateEngine.interpreter() + for ((key, value) <- values) { + interpreter.getContext.put(key, value) + } + try { + val rendered = JinjaTemplateEngine.withPluginClassLoader(interpreter.render(node, false)) + writer.write(rendered) + } catch { + case ex: UnknownTokenException => + throw new UnboundVariablesException(Seq(TemplateVariableName.parse(ex.getToken)), Some(ex)) + case ex: InterpretException => + throw new TemplateEvaluationException(ex.getMessage, Some(ex)) + } + + // For now, we just throw any errors. In the future, we could improve this and add an error collector. + if (!interpreter.getErrors.isEmpty) { + val messages = interpreter.getErrors.asScala.map { error => + Option(error.getException).map(rootCauseMessage).getOrElse(error.getMessage) + } + val prefix = if (messages.size == 1) "Error in template: " else "Errors in template: " + val msg = prefix + messages.mkString(" ") + val cause = Option(interpreter.getErrors.get(0).getException) + throw new TemplateEvaluationException(msg, cause) + } + } + + private def rootCauseMessage(throwable: Throwable): String = { + Option(throwable.getCause) match { + case Some(cause) if cause ne throwable => rootCauseMessage(cause) + case _ => throwable.getMessage + } + } + +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplatingPlugins.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplatingPlugins.scala new file mode 100644 index 0000000000..b7b6091c13 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaTemplatingPlugins.scala @@ -0,0 +1,7 @@ +package org.silkframework.plugins.templating.jinja + +import org.silkframework.runtime.plugin.{AnyPlugin, PluginModule} + +class JinjaTemplatingPlugins extends PluginModule { + override def pluginClasses: Seq[Class[_ <: AnyPlugin]] = Seq(classOf[JinjaTemplateEngine]) +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala new file mode 100644 index 0000000000..2aea83093e --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollector.scala @@ -0,0 +1,193 @@ +package org.silkframework.plugins.templating.jinja + +import com.hubspot.jinjava.el.ExtendedSyntaxBuilder +import com.hubspot.jinjava.lib.tag._ +import com.hubspot.jinjava.tree.parse.ExpressionToken +import com.hubspot.jinjava.tree.{ExpressionNode, Node, TagNode} +import com.hubspot.jinjava.util.HelperStringTokenizer +import jinjava.de.odysseus.el.tree.TreeBuilderException +import jinjava.de.odysseus.el.tree.impl.ast.{AstDot, AstEval} +import org.silkframework.runtime.templating.TemplateVariableName + +import scala.collection.immutable.ArraySeq +import scala.jdk.CollectionConverters.{IterableHasAsScala, ListHasAsScala} + +/** + * Collects all referenced variables in a Jinja template. + */ +class JinjaVariableCollector { + + private val EXPRESSION_START_TOKEN = "#{" + private val EXPRESSION_END_TOKEN = "}" + + private val builder = new ExtendedSyntaxBuilder + + /** + * Collects all variable names from a Jinja template node. + */ + def collect(node: Node, scope: Scope = Scope.empty): Scope = { + node match { + case tagNode: TagNode => + collectFromTag(tagNode, scope) + case exprNode: ExpressionNode => + scope ++ collectFromExpression(exprNode.getMaster.asInstanceOf[ExpressionToken].getExpr) + case _ => + collectFromChildren(node, scope) + } + } + + /** + * Collects all variable names from a Jinja template tag. + * Needs to copy code from the individual tags to replicate behaviour. + */ + private def collectFromTag(tagNode: TagNode, scope: Scope): Scope = { + tagNode.getTag match { + case _: IfTag | _: ElseIfTag | _: DoTag => + scope ++ collectFromExpression(tagNode.getHelpers) ++ collectFromChildren(tagNode, scope) + case _: ForTag => + // Parses expressions of the form "loopVars in loopedVars" + val parts = tagNode.getHelpers.split("\\s+in\\s+") + if (parts.length == 2) { + val loopVars = new HelperStringTokenizer(parts(0)).splitComma(true).allTokens + val loopedVars = collectFromExpression(parts(1)) + val childVars = collectFromChildren(tagNode, scope.withBoundNames(loopVars.asScala.toSeq)) + val filtedChildVars = childVars.unboundVars.filterNot(v => v.scope == Seq("loop") || v.name == "loop" ) + loopedVars.withUnbound(filtedChildVars) + } else { + collectFromChildren(tagNode, scope) + } + case _: SetTag => + val expression = tagNode.getHelpers + val eqPos = expression.indexOf('=') + if(eqPos != -1) { + val leftVars = ArraySeq.unsafeWrapArray(expression.substring(0, eqPos).trim.split("\\s*,\\s*")) + val rightVars = collectFromExpression("[" + expression.substring(eqPos + 1) + "]") + scope.withBoundNames(leftVars) ++ rightVars + } else { + scope + } + case _: MacroTag => + // Add all parameters as bound variables to the scope + val functionScope = scope.withBound(collectFromExpression(tagNode.getHelpers).unboundVars) + // Collect any unbound variables within the macro + collectFromChildren(tagNode, functionScope) + case _ => + collectFromChildren(tagNode, scope) + } + } + + private def collectFromChildren(node: Node, scope: Scope): Scope = { + var curScope = scope + for(child <- node.getChildren.asScala) { + val newScope = collect(child, curScope) + curScope = newScope + } + // Any newly bound variable is not valid outside of this node's children + curScope.copy(boundVars = scope.boundVars) + } + + /** + * Parses an expression from a Jinja template and collects all variable names. + * Expressions are used in tags, such as in if and for expressions. + */ + private def collectFromExpression(expression: String): Scope = { + try { + val tree = builder.build(EXPRESSION_START_TOKEN + expression + EXPRESSION_END_TOKEN) + // Manually treat simple expressions of the form `project.variable` or `variable.method(...)` + expression match { + case JinjaVariableCollector.scopedName(scopePart, name) => + val scope = scopePart.dropRight(1).split('.').toSeq + Scope( + unboundVars = Seq(new TemplateVariableName(name, scope)) + ) + case JinjaVariableCollector.methodCallOnVar(varName) => + Scope( + unboundVars = Seq(new TemplateVariableName(varName, Seq.empty)) + ) + case _ => + // Try to find scoped variable references (e.g. `scope.name`) within complex expressions + val scopedVars = JinjaVariableCollector.scopedName.findAllMatchIn(expression).map { m => + val scopePart = m.group(1).dropRight(1) + val name = m.group(2) + new TemplateVariableName(name, scopePart.split('.').toSeq) + }.toSeq + // Collect plain (unscoped) identifiers, excluding roots of scoped vars (e.g. `loop` from `loop.index`) + val scopedRoots = scopedVars.flatMap(_.scope.headOption).toSet + val plainVars = tree.getIdentifierNodes.asScala + .map(_.getName) + .filterNot(ignoreIdentifierNode) + .filterNot(scopedRoots) + .toSeq + .map(new TemplateVariableName(_, Seq.empty)) + Scope(unboundVars = (scopedVars ++ plainVars).distinct) + } + } catch { + case _: TreeBuilderException => + // Fallback: try to extract the leading variable from method call expressions like `var.method(...)` + expression match { + case JinjaVariableCollector.methodCallOnVar(varName) => + Scope(unboundVars = Seq(new TemplateVariableName(varName, Seq.empty))) + case _ => + Scope.empty + } + } + } + + private def ignoreIdentifierNode(name: String): Boolean = { + name.startsWith("___") || // internal identifier + name.startsWith("filter:") || // Jinja filter + name.startsWith("exptest:") // Jinja test + } + + /** + * Holds all bound and unbound variables at a specific node in the AST. + */ + case class Scope(unboundVars: Seq[TemplateVariableName], boundVars: Seq[TemplateVariableName] = Seq.empty) { + + def withBoundNames(varNames: Seq[String]): Scope = { + withBound(varNames.map(new TemplateVariableName(_, Seq.empty))) + } + + def withBound(varNames: Seq[TemplateVariableName]): Scope = { + copy(boundVars = (boundVars ++ varNames).distinct) + } + + def withUnbound(varNames: Seq[TemplateVariableName]): Scope = { + copy(unboundVars = (unboundVars ++ varNames).distinct) + } + + /** + * Adds a scope from a subsequent node. + */ + def ++(scope: Scope): Scope = { + val boundVarsSet = boundVars.toSet + val boundSimpleNames = boundVars.filter(_.scope.isEmpty).map(_.name).toSet + def isBound(v: TemplateVariableName): Boolean = { + boundVarsSet.contains(v) || v.scope.headOption.exists(boundSimpleNames.contains) + } + Scope( + unboundVars = (unboundVars ++ scope.unboundVars).distinct.filterNot(isBound), + boundVars = (boundVars ++ scope.boundVars).distinct + ) + } + + } + + object Scope { + def empty: Scope = Scope(Seq.empty, Seq.empty) + } + +} + +object JinjaVariableCollector { + + // Regex for valid variable names + private val variableRegex = "[a-zA-Z_][a-zA-Z0-9_]*".r + + // Regex for scoped names of the form scope1[.scope2]*.var + private val scopedName = s"((?:$variableRegex\\.)+)($variableRegex)".r + + // Regex for method calls on a variable of the form var.method(...) + private val methodCallOnVar = s"($variableRegex)\\.$variableRegex\\(.*\\)".r + +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/TransformFilters.scala b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/TransformFilters.scala new file mode 100644 index 0000000000..adcda3aaf4 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/main/scala/org/silkframework/plugins/templating/jinja/TransformFilters.scala @@ -0,0 +1,53 @@ +package org.silkframework.plugins.templating.jinja + +import com.hubspot.jinjava.interpret.{Context, JinjavaInterpreter} +import com.hubspot.jinjava.lib.filter.Filter +import org.silkframework.rule.input.Transformer +import org.silkframework.runtime.plugin.{ParameterValues, PluginContext, PluginDescription, PluginRegistry} +import org.silkframework.runtime.templating.IterableTemplateValues + +/** + * Makes transformer plugins available as Jinja filters. + */ +object TransformFilters { + + /** + * Registers all available transformers as Jinja filters. + */ + def register(context: Context): Unit = { + for(transformerPlugin <- PluginRegistry.availablePlugins[Transformer]) { + if(context.getFilter(transformerPlugin.id) == null) { + context.registerFilter(new TransformFilter(transformerPlugin)) + } + } + } + + /** + * A Jinja filter that is based on a transformer. + */ + class TransformFilter(transformerPlugin: PluginDescription[Transformer]) extends Filter { + + override def getName: String = transformerPlugin.id + + override def filter(value: Any, interpreter: JinjavaInterpreter, args: String*): AnyRef = { + // Create transformer instance with parameters + implicit val pluginContext: PluginContext = PluginContext.empty + val paramValues = + for((param, value) <- transformerPlugin.parameters zip args) yield { + (param.name, value) + } + val transformer = transformerPlugin(ParameterValues.fromStringMap(paramValues.toMap)) + + // Evaluate transformer + val inputValues = value match { + case r: IterableTemplateValues => r.values + case v: Any => Seq(v.toString) + } + val transformedValues = transformer(Seq(inputValues)) + + // Return result + IterableTemplateValues.fromValues(transformedValues) + } + } + +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala new file mode 100644 index 0000000000..3980b764ab --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaEngineTest.scala @@ -0,0 +1,189 @@ +package org.silkframework.plugins.templating.jinja + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import org.silkframework.runtime.templating.TemplateVariableValue +import org.silkframework.runtime.templating.exceptions.UnboundVariablesException + +import java.io.{StringWriter, Writer} +import scala.collection.immutable.ArraySeq +import scala.jdk.CollectionConverters.{MapHasAsJava, SeqHasAsJava} + +class JinjaEngineTest extends AnyFlatSpec with Matchers { + + behavior of "JinjaEngine" + + it should "fail if variables are not bound" in { + intercept[UnboundVariablesException]( + evaluate( + template = "{{name}} {{location}}", + values = Map("firstName"-> Seq("John")) + ) + ).missingVars.map(_.name) shouldBe Seq("name", "location") + + intercept[UnboundVariablesException]( + evaluate( + template = "{{name | lower}}", + values = Map("firstName"-> Seq("John")) + ) + ).missingVars.map(_.name) shouldBe Seq("name") + } + + it should "support transformer plugins to be used as filters" in { + evaluate( + template = "{{name | lowerCase}}", + values = Map("name"-> Seq("John")) + ) shouldBe "john" + + evaluate( + template = "{{names | removeDuplicates | concatMultiValues(', ')}}", + values = Map("names"-> Seq("John", "Max", "John")) + ) shouldBe "John, Max" + } + + it should "support combining built-in filters with DataIntegration transformer filters" in { + evaluate( + template = "{{input | lower | tokenize(',') | join('-')}}", + values = Map("input"-> Seq("A,B,C")) + ) shouldBe "a-b-c" + } + + it should "support complex templates" in { + val template = + """ + | {% for user in users %} + | {% set location, country = city, 'Germany' %} + | {{user}} is from {{location}}, {{country}} + | {% endfor %} + | + |""".stripMargin + + val values = Map( + "users"-> Seq("John", "Max"), + "city" -> Seq("Berlin") + ) + + val expectedLines = Seq( + "John is from Berlin, Germany", + "Max is from Berlin, Germany" + ) + + lines(evaluate(template, values)) shouldBe expectedLines + } + + it should "support templates with macros" in { + val template = + """ {% macro foo() %} + | {{ "hello world" }} + | {% endmacro %} + | {{ foo() }}""".stripMargin + + lines(evaluate(template, Map.empty)) shouldBe Seq("hello world") + } + + it should "support templates with macros with parameters" in { + val template = + """ {% macro foo(name) %} + | Hello {{name}} + | {% endmacro %} + | {{ foo('John') }}""".stripMargin + + lines(evaluate(template, Map.empty)) shouldBe Seq("Hello John") + } + + it should "support loop cycle helper" in { + val template = + """{% for nr in nrs %} + | {{ loop.cycle('odd', 'even') }} {{nr}} + |{% endfor %}""".stripMargin + + lines(evaluate(template, Map("nrs" -> Seq("1", "2")))) shouldBe Seq("odd 1", "even 2") + } + + it should "support call and caller()" in { + val template = """{% macro renderIt(title, class='default') -%} + | {{title}} ({{class}}) [{{caller()}}] + |{%- endmacro %} + | + |{% call renderIt('Titel') %} + |caller text + |{% endcall %}""".stripMargin + + lines(evaluate(template, Map.empty)) shouldBe Seq("Titel (default) [", "caller text", "]") + } + + // FIXME: jinjava does not support filter tags yet + it should "support filter blocks" ignore { + val template = """{% filter upper %} + | to upper + |{% endfilter %}""".stripMargin + evaluate(template, Map.empty).trim shouldBe "TO UPPER" + } + + it should "set and use variables" in { + val template = + """{% set newVar = "new var" %} + |A {{newVar}} + |""".stripMargin + evaluate(template, Map.empty).trim shouldBe "A new var" + } + + it should "support to set and use nested variables" in { + val template = + """{% set nested = ({"sub": {"label": "Label"}}) %} + |A {{nested.sub.label}} + |""".stripMargin + lines(evaluate(template, Map.empty)) shouldBe Seq("A Label") + } + + it should "be able to access global functions" in { + val template = """{% for number in range(1, 3) %} + | {{number}} + |{% endfor %}""".stripMargin + lines(evaluate(template, Map.empty)) shouldBe Seq("1", "2") + } + + it should "support sorting value" in { + val template = + """ + | {% for e in entities | sort(false, false, 'order') %} + | {{ e.item }}: {{ e.order }} + | {% endfor %} + | + |""".stripMargin + + val values = Map( + "entities"-> Seq(Map("item" -> "1", "order" -> "2").asJava, Map("item" -> "2", "order" -> "1").asJava).asJava + ) + + val expectedLines = Seq( + "2: 1", + "1: 2" + ) + + lines(evaluateRaw(template, values)) shouldBe expectedLines + } + + private def evaluate(template: String, values: Map[String, Seq[String]]): String = { + val writer = new StringWriter() + val compileTemplate = JinjaTemplateEngine().compile(template) + val templateValues = + for((name, value) <- values.toSeq) yield { + new TemplateVariableValue(name, Seq.empty, value) + } + compileTemplate.evaluate(templateValues, writer) + writer.toString + } + + private def evaluateRaw(template: String, values: Map[String, AnyRef]): String = { + val writer = new StringWriter() + val compileTemplate = JinjaTemplateEngine().compile(template) + compileTemplate.evaluate(values, writer: Writer) + writer.toString + } + + private def lines(str: String): Seq[String] = { + ArraySeq.unsafeWrapArray(str.split("\\s*[\n\r]+\\s*").filter(_.nonEmpty)) + } + +} diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollectorTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollectorTest.scala new file mode 100644 index 0000000000..0a5c5d1ee2 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaMethodCollectorTest.scala @@ -0,0 +1,64 @@ +package org.silkframework.plugins.templating.jinja + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import org.silkframework.runtime.templating.TemplateMethodUsage + +class JinjaMethodCollectorTest extends AnyFlatSpec with Matchers { + + behavior of "JinjaMethodCollector" + + it should "collect a method call in an expression node" in { + collect("""{{ row.uri("subject") }}""", "row") shouldBe Seq(TemplateMethodUsage("uri", "subject")) + } + + it should "collect a method call in an if tag helper" in { + collect("""{% if row.exists("somePath") %}yes{% endif %}""", "row") shouldBe Seq(TemplateMethodUsage("exists", "somePath")) + } + + it should "collect multiple method calls across nodes" in { + collect( + """ + |{{ row.uri("subject") }} + |{% if row.exists("somePath") %} + | {{ row.plainLiteral("somePath") }} + | {{ row.rawUnsafe("trustedValuePath") }} + |{% endif %} + |""".stripMargin, "row" + ) shouldBe Seq( + TemplateMethodUsage("uri", "subject"), + TemplateMethodUsage("exists", "somePath"), + TemplateMethodUsage("plainLiteral", "somePath"), + TemplateMethodUsage("rawUnsafe", "trustedValuePath") + ) + } + + it should "only collect methods on the requested variable" in { + collect( + """{{ row.uri("subject") }} {{ other.uri("subject") }}""", + "row" + ) shouldBe Seq(TemplateMethodUsage("uri", "subject")) + } + + it should "collect method calls using single-quoted parameters" in { + collect("""{{ row.uri('subject') }}""", "row") shouldBe Seq(TemplateMethodUsage("uri", "subject")) + } + + it should "return an empty sequence when no methods are called on the variable" in { + collect("""INSERT DATA { "hello" }""", "row") shouldBe Seq.empty + } + + it should "return an empty sequence when the variable is not present" in { + collect("""{{ other.uri("subject") }}""", "row") shouldBe Seq.empty + } + + it should "not collect method calls without a string constant parameter" in { + // row.method(var) — non-constant parameter, should not be collected + collect("""{{ row.uri(subject) }}""", "row") shouldBe Seq.empty + } + + private def collect(template: String, variableName: String): Seq[TemplateMethodUsage] = { + val node = JinjaTemplateEngine().compile(template).node + new JinjaMethodCollector().collect(node, variableName) + } +} \ No newline at end of file diff --git a/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala new file mode 100644 index 0000000000..f2ce0c231a --- /dev/null +++ b/silk-plugins/silk-plugins-templating-jinja/src/test/scala/org/silkframework/plugins/templating/jinja/JinjaVariableCollectorTest.scala @@ -0,0 +1,136 @@ +package org.silkframework.plugins.templating.jinja + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +class JinjaVariableCollectorTest extends AnyFlatSpec with Matchers { + + behavior of "JinjaVariableCollector" + + it should "collect plain variable replacements" in { + collect("This is {{name}} from {{city}}.") shouldBe Seq("name", "city") + } + + it should "collect variables in conditions" in { + collect( + """ + | {% if title == "Mayor" %} + | This is the Mayor. + | {% else %} + | This is {{name}}. + | {% endif %} + |""".stripMargin) shouldBe Seq("title", "name") + + collect( + """ + | {% if var1 == "1" %} + | Case 1 + | {% elif var2 == "2" %} + | Case 2 + | {% if var3 == "3" %} + | Case 3 + | {% elif var4 == "4" %} + | Case 4 + | {% endif %} + | {% endif %} + |""".stripMargin) shouldBe Seq("var1", "var2", "var3", "var4") + } + + it should "collect variables inside expressions" in { + val variables = collect("{{names | removeDuplicates | concatMultiValues(', ')}}") + variables shouldBe Seq("names") + } + + it should "collect variables inside do statements" in { + val variables = collect("{% do name %}") + variables shouldBe Seq("name") + } + + it should "collect variables in for-loops" in { + collect( + """ + | {% for user in users %} + | This is {{user}} at index {{loop.index}}. + | {% endfor %} + |""".stripMargin) shouldBe Seq("users") + + collect( + """ + | {% for user in users %} + | {{ inputs | getValueByIndex(loop.index) }} + | {% endfor %} + |""".stripMargin) shouldBe Seq("users", "inputs") + } + + it should "collect variables in set expressions" in { + collect( + """ + | {% set location, country, timestamp = city, 'Germany', time %} + | {{user}} is from {{location}}, {{country}} ({{timestamp}}) + | + |""".stripMargin) shouldBe Seq("city", "time", "user") + collect( + """ + | {% for user in users %} + | {% set location, country, timestamp = city, 'Germany', time %} + | {{user}} is from {{location}}, {{country}} ({{timestamp}}) + | {% endfor %} + | + |""".stripMargin) shouldBe Seq("users", "city", "time") + } + + it should "collect variables in tests" in { + collect( + """ + | {% if title is defined %} + | Is defined + | {% endif %} + |""".stripMargin) shouldBe Seq("title") + } + + it should "collect scoped variables in simple expressions" in { + collect("This is {{project.name}} from {{global.city}}.") shouldBe Seq("project.name", "global.city") + } + + it should "collect scoped variables in complex expressions" in { + collect("{{ input.parameters.graph ~ \"/data\" }}") shouldBe Seq("input.parameters.graph") + collect("{{ a.b ~ c.d }}") shouldBe Seq("a.b", "c.d") + } + + it should "collect variables used in method calls" in { + collect( + """ + | {{ row.uri("subject") }} + | {% if row.exists("somePath") %} + | Plain: {{ row.plainLiteral("somePath") }} + | Raw: {{ row.rawUnsafe("trustedValuePath") }} + | {% endif %} + |""".stripMargin) shouldBe Seq("row") + } + + it should "don't fail on empty expressions" in { + collect("{{ }}".stripMargin) shouldBe Seq.empty + } + + it should "don't collect bound variables in macros" in { + collect( + """ {% macro foo(name) %} + | Hello {{name}} + | {% endmacro %} + | {{ foo('John') }}""".stripMargin) shouldBe Seq() + } + + it should "collect unbound variables in macros" in { + collect( + """ {% macro foo(street, number) %} + | {{street}} {{number}}, {{country}} + | {% endmacro %} + | {{ foo('Hainstraße', '8') }}""".stripMargin) shouldBe Seq("country") + } + + private def collect(template: String): Seq[String] = { + val node = JinjaTemplateEngine().compile(template).node + new JinjaVariableCollector().collect(node).unboundVars.map(_.scopedName) + } + +} diff --git a/silk-plugins/silk-plugins-templating-velocity/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule b/silk-plugins/silk-plugins-templating-velocity/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule new file mode 100644 index 0000000000..364210cf06 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/resources/META-INF/services/org.silkframework.runtime.plugin.PluginModule @@ -0,0 +1 @@ +org.silkframework.plugins.templating.velocity.VelocityTemplatingPlugins diff --git a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala new file mode 100644 index 0000000000..3ddad7ef76 --- /dev/null +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplateEngine.scala @@ -0,0 +1,147 @@ +package org.silkframework.plugins.templating.velocity + +import org.apache.velocity.VelocityContext +import org.apache.velocity.context.Context +import org.apache.velocity.exception.MethodInvocationException +import org.apache.velocity.runtime.parser.node._ +import org.apache.velocity.{Template => VelocityTemplate} +import org.apache.velocity.runtime.RuntimeSingleton +import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.templating.exceptions.TemplateEvaluationException +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, TemplateEngine, TemplateMethodUsage, TemplateVariableName, TemplateVariableValue} + +import java.io.{StringReader, StringWriter, Writer} + +/** + * A general-purpose templating engine based on Apache Velocity. + */ +@Plugin( + id = VelocityTemplateEngine.id, + label = "Velocity (deprecated)", + description = "A templating engine based on Apache Velocity.", + deprecation = "This template engine is deprecated. Please use the 'Jinja' template engine instead." +) +case class VelocityTemplateEngine() extends TemplateEngine { + + override def compile(templateString: String): VelocityCompiledTemplate = { + new VelocityCompiledTemplate(templateString) + } +} + +object VelocityTemplateEngine { + final val id = "velocity" + + /** Creates a Velocity template based on the given template string. */ + def createTemplate(templateString: String): VelocityTemplate = { + val service = RuntimeSingleton.getRuntimeServices + service.addProperty("runtime.strict_mode.enable", true) + val reader = new StringReader(templateString) + val template = new VelocityTemplate() + template.setRuntimeServices(service) + template.setData(service.parse(reader, template)) + template.initDocument() + template + } + + /** Renders the template with the given context. */ + def renderTemplate(template: VelocityTemplate, context: Context): String = { + val writer = new StringWriter() + try { + template.merge(context, writer) + } catch { + case ex: MethodInvocationException => + ex.getCause match { + case cause: TemplateEvaluationException => throw cause + case _ => throw new TemplateEvaluationException(ex.getMessage, Some(ex)) + } + } + writer.toString + } +} + +/** + * A compiled template based on Apache Velocity. + */ +class VelocityCompiledTemplate(val templateString: String) extends CompiledTemplate { + + private val velocityTemplate = VelocityTemplateEngine.createTemplate(templateString) + + override lazy val variables: Option[Seq[TemplateVariableName]] = { + Some(extractVariableReferences(velocityTemplate.getData.asInstanceOf[SimpleNode]) + .map(name => new TemplateVariableName(name, Seq.empty)).distinct) + } + + override def evaluate(values: Map[String, AnyRef], writer: Writer): Unit = { + val context = new VelocityContext() + values.foreach { case (k, v) => context.put(k, v) } + writer.write(VelocityTemplateEngine.renderTemplate(velocityTemplate, context)) + } + + override def evaluate(values: Seq[TemplateVariableValue], writer: Writer, evaluationConfig: EvaluationConfig): Unit = { + evaluate(convertValues(values), writer) + } + + /** Extracts top-level variable references from the Velocity AST. */ + private def extractVariableReferences(node: Node): List[String] = { + node match { + case ref: ASTReference => + List(ref.getRootString) + case other: SimpleNode => + (0 until other.jjtGetNumChildren()).flatMap(idx => extractVariableReferences(other.jjtGetChild(idx))).toList + case _ => + List.empty + } + } + + override def methodUsages(variableName: String): Seq[TemplateMethodUsage] = { + velocityTemplate.getData match { + case simpleNode: SimpleNode => + retrieveMethodUsages(simpleNode, variableName) + case None => + throw new RuntimeException(s"Unexpected error: Cannot retrieve $variableName object method usages from Velocity template.") + } + } + + /** Retrieves method usages on a given variable from the AST. */ + private def retrieveMethodUsages(simpleNode: Node, varName: String): List[TemplateMethodUsage] = { + simpleNode match { + case astMethod: ASTMethod => + astReferenceName(astMethod.jjtGetParent()) match { + case Some(v) if v == varName && hasSingleStringParameter(astMethod) => + val parameterValue = astMethod.jjtGetChild(1).jjtGetChild(0).asInstanceOf[ASTStringLiteral].literal().stripPrefix("\"").stripSuffix("\"") + List(TemplateMethodUsage(astMethod.getMethodName, parameterValue)) + case _ => + List.empty + } + case other: SimpleNode => + retrieveChildMethodUsages(other, varName) + } + } + + /** Checks that there is a single string constant as parameter. */ + private def hasSingleStringParameter(astMethod: ASTMethod): Boolean = { + astMethod.jjtGetNumChildren() == 2 && { + val parameter = astMethod.jjtGetChild(1) + parameter.isInstanceOf[ASTExpression] && + parameter.jjtGetNumChildren() == 1 && + parameter.jjtGetChild(0).isInstanceOf[ASTStringLiteral] && + parameter.jjtGetChild(0).asInstanceOf[ASTStringLiteral].isConstant + } + } + + private def astReferenceName(node: Node): Option[String] = { + node match { + case reference: ASTReference => + Some(reference.getRootString) + case _ => + None + } + } + + private def retrieveChildMethodUsages(other: SimpleNode, varName: String): List[TemplateMethodUsage] = { + val childPaths = for (idx <- 0 until other.jjtGetNumChildren()) yield { + retrieveMethodUsages(other.jjtGetChild(idx), varName) + } + childPaths.fold(List.empty[TemplateMethodUsage])((a, b) => a ::: b) + } +} diff --git a/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplatingPlugins.scala b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplatingPlugins.scala new file mode 100644 index 0000000000..e23e204dab --- /dev/null +++ b/silk-plugins/silk-plugins-templating-velocity/src/main/scala/org/silkframework/plugins/templating/velocity/VelocityTemplatingPlugins.scala @@ -0,0 +1,7 @@ +package org.silkframework.plugins.templating.velocity + +import org.silkframework.runtime.plugin.{AnyPlugin, PluginModule} + +class VelocityTemplatingPlugins extends PluginModule { + override def pluginClasses: Seq[Class[_ <: AnyPlugin]] = Seq(classOf[VelocityTemplateEngine]) +} diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/RulePlugins.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/RulePlugins.scala index c754317419..00e05c7631 100644 --- a/silk-rules/src/main/scala/org/silkframework/rule/plugins/RulePlugins.scala +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/RulePlugins.scala @@ -38,6 +38,7 @@ import org.silkframework.rule.plugins.transformer.numeric._ import org.silkframework.rule.plugins.transformer.replace.{MapTransformer, MapTransformerWithDefaultInput, RegexReplaceTransformer, ReplaceTransformer} import org.silkframework.rule.plugins.transformer.selection.{CoalesceTransformer, RegexSelectTransformer} import org.silkframework.rule.plugins.transformer.sequence.{GetValueByIndexTransformer, SortTransformer, ValuesToIndexesTransformer} +import org.silkframework.rule.plugins.transformer.sparql.{EscapeLiteralTransformer, EscapeMultilineLiteralTransformer, ValidateUriTransformer} import org.silkframework.rule.plugins.transformer.substring._ import org.silkframework.rule.plugins.transformer.tokenization.{CamelCaseTokenizer, Tokenizer} import org.silkframework.rule.plugins.transformer.validation._ @@ -148,6 +149,10 @@ class RulePlugins extends PluginModule { // Selection classOf[RegexSelectTransformer] :: classOf[CoalesceTransformer] :: + // SPARQL + classOf[ValidateUriTransformer] :: + classOf[EscapeLiteralTransformer] :: + classOf[EscapeMultilineLiteralTransformer] :: Nil private def measures: List[Class[_ <: AnyPlugin]] = diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformer.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformer.scala new file mode 100644 index 0000000000..14b233f24f --- /dev/null +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformer.scala @@ -0,0 +1,52 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.annotations.{TransformExample, TransformExamples} +import org.silkframework.rule.input.SimpleTransformer +import org.silkframework.runtime.plugin.annotations.Plugin + +@Plugin( + id = "escape_literal", + categories = Array("SPARQL"), + label = "Escape SPARQL plain literal", + description = "Escapes a value so it can be safely used inside a SPARQL short-form string literal. " + + "Escapes backslashes, quotes, newlines, carriage returns and tabs. " + + "The returned value does not include enclosing quotation marks." +) +@TransformExamples(Array( + new TransformExample( + input1 = Array("simple value"), + output = Array("simple value") + ), + new TransformExample( + input1 = Array("with \"quotes\""), + output = Array("with \\\"quotes\\\"") + ), + new TransformExample( + input1 = Array("back\\slash"), + output = Array("back\\\\slash") + ), + new TransformExample( + input1 = Array("line1\nline2"), + output = Array("line1\\nline2") + ) +)) +case class EscapeLiteralTransformer() extends SimpleTransformer { + + override def evaluate(value: String): String = { + val sb = new StringBuilder(value.length) + var i = 0 + while (i < value.length) { + value.charAt(i) match { + case '\\' => sb.append("\\\\") + case '"' => sb.append("\\\"") + case '\'' => sb.append("\\'") + case '\n' => sb.append("\\n") + case '\r' => sb.append("\\r") + case '\t' => sb.append("\\t") + case c => sb.append(c) + } + i += 1 + } + sb.toString + } +} \ No newline at end of file diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformer.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformer.scala new file mode 100644 index 0000000000..21577e9620 --- /dev/null +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformer.scala @@ -0,0 +1,55 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.annotations.{TransformExample, TransformExamples} +import org.silkframework.rule.input.SimpleTransformer +import org.silkframework.runtime.plugin.annotations.Plugin + +import scala.util.matching.Regex + +@Plugin( + id = "escape_multiline_literal", + categories = Array("SPARQL"), + label = "Escape SPARQL multiline literal", + description = "Escapes a value so it can be safely used inside a SPARQL triple-quoted string literal " + + "(`\"\"\"...\"\"\"` or `'''...'''`). Escapes backslashes and breaks any run of three or more " + + "consecutive single or double quotes. Individual quotes and newlines are preserved. " + + "The returned value does not include enclosing quotation marks." +) +@TransformExamples(Array( + new TransformExample( + input1 = Array("simple\nvalue"), + output = Array("simple\nvalue") + ), + new TransformExample( + input1 = Array("with \"quote\""), + output = Array("with \"quote\"") + ), + new TransformExample( + input1 = Array("back\\slash"), + output = Array("back\\\\slash") + ), + new TransformExample( + input1 = Array("triple \"\"\" quotes"), + output = Array("triple \\\"\\\"\\\" quotes") + ), + new TransformExample( + input1 = Array("triple ''' quotes"), + output = Array("triple \\'\\'\\' quotes") + ) +)) +case class EscapeMultilineLiteralTransformer() extends SimpleTransformer { + + override def evaluate(value: String): String = { + val withBackslashes = value.replace("\\", "\\\\") + val noTripleDq = EscapeMultilineLiteralTransformer.dqRun3.replaceAllIn(withBackslashes, + m => Regex.quoteReplacement("\\\"" * m.matched.length)) + val noTripleSq = EscapeMultilineLiteralTransformer.sqRun3.replaceAllIn(noTripleDq, + m => Regex.quoteReplacement("\\'" * m.matched.length)) + noTripleSq + } +} + +object EscapeMultilineLiteralTransformer { + private val dqRun3: Regex = "\"{3,}".r + private val sqRun3: Regex = "'{3,}".r +} \ No newline at end of file diff --git a/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala new file mode 100644 index 0000000000..e4dce73bbd --- /dev/null +++ b/silk-rules/src/main/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformer.scala @@ -0,0 +1,45 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.annotations.{TransformExample, TransformExamples} +import org.silkframework.rule.input.SimpleTransformer +import org.silkframework.runtime.plugin.annotations.Plugin +import org.silkframework.runtime.validation.ValidationException + +import java.net.URI +import scala.util.Try + +@Plugin( + id = "validate_uri", + categories = Array("Validation", "SPARQL"), + label = "Validate URI", + description = "Validates that the input is a valid absolute IRI and returns it unchanged. " + + "Throws a validation error if the input is not a valid IRI. " +) +@TransformExamples(Array( + new TransformExample( + input1 = Array("http://example.org/entity1"), + output = Array("http://example.org/entity1") + ), + new TransformExample( + input1 = Array("urn:example:1"), + output = Array("urn:example:1") + ), + new TransformExample( + input1 = Array("not a uri"), + throwsException = classOf[ValidationException] + ), + new TransformExample( + input1 = Array(""), + throwsException = classOf[ValidationException] + ) +)) +case class ValidateUriTransformer() extends SimpleTransformer { + + override def evaluate(value: String): String = { + Try(new URI(value)) match { + case scala.util.Success(uri) if uri.isAbsolute => value + case _ => + throw new ValidationException(s"'$value' is not a valid absolute IRI") + } + } +} \ No newline at end of file diff --git a/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformerTest.scala b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformerTest.scala new file mode 100644 index 0000000000..19c39f81cd --- /dev/null +++ b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeLiteralTransformerTest.scala @@ -0,0 +1,5 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.test.TransformerTest + +class EscapeLiteralTransformerTest extends TransformerTest[EscapeLiteralTransformer] \ No newline at end of file diff --git a/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformerTest.scala b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformerTest.scala new file mode 100644 index 0000000000..3bde03cd73 --- /dev/null +++ b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/EscapeMultilineLiteralTransformerTest.scala @@ -0,0 +1,5 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.test.TransformerTest + +class EscapeMultilineLiteralTransformerTest extends TransformerTest[EscapeMultilineLiteralTransformer] \ No newline at end of file diff --git a/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformerTest.scala b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformerTest.scala new file mode 100644 index 0000000000..719896a03f --- /dev/null +++ b/silk-rules/src/test/scala/org/silkframework/rule/plugins/transformer/sparql/ValidateUriTransformerTest.scala @@ -0,0 +1,5 @@ +package org.silkframework.rule.plugins.transformer.sparql + +import org.silkframework.rule.test.TransformerTest + +class ValidateUriTransformerTest extends TransformerTest[ValidateUriTransformer] \ No newline at end of file diff --git a/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala b/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala index 90f3fc9a55..f48e4a9be5 100644 --- a/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala +++ b/silk-workbench/silk-workbench-core/app/controllers/workspaceApi/coreApi/VariableTemplateApi.scala @@ -15,7 +15,7 @@ import io.swagger.v3.oas.annotations.tags.Tag import io.swagger.v3.oas.annotations.{Operation, Parameter} import org.silkframework.runtime.templating.exceptions._ import org.silkframework.runtime.templating.operations.{DeleteVariableModification, UpdateVariableModification, UpdateVariablesModification} -import org.silkframework.runtime.templating.{GlobalTemplateVariables, TemplateVariable, TemplateVariables} +import org.silkframework.runtime.templating.{GlobalTemplateVariables, TemplateVariable, TemplateVariableScopes, TemplateVariables} import org.silkframework.runtime.validation.BadUserInputException import org.silkframework.serialization.json.JsonHelpers import org.silkframework.workspace.WorkspaceFactory @@ -334,7 +334,7 @@ class VariableTemplateApi @Inject()() extends InjectedController with UserContex val dependencyErrors = ex.issues.collect { case TemplateVariableEvaluationException(dependentVar, unboundEx: UnboundVariablesException) => - (dependentVar.name, unboundEx.missingVars.filter(_.scope == "project").map(_.name)) + (dependentVar.name, unboundEx.missingVars.filter(_.scope == TemplateVariableScopes.project).map(_.name)) }.filter(_._2.nonEmpty).toMap if(dependencyErrors.nonEmpty) { throw new CannotReorderVariablesException(dependencyErrors) @@ -446,8 +446,7 @@ object VariableTemplateApi { ) isSensitive: Boolean, @Schema( - description = "The scope of the variable.", - example = "project", + description = "The scope of the variable, e.g. \"project\".", requiredMode = RequiredMode.REQUIRED ) scope: String) { @@ -455,13 +454,13 @@ object VariableTemplateApi { if (value.isEmpty && template.isEmpty) { throw new BadUserInputException("Either the variable value or its template has to be defined.") } - TemplateVariable(name, value.getOrElse(""), template, description, isSensitive, scope) + TemplateVariable(name, value.getOrElse(""), template, description, isSensitive, scope.split('.').toIndexedSeq) } } object TemplateVariableJson { def apply(variable: TemplateVariable): TemplateVariableJson = { - TemplateVariableJson(variable.name, Some(variable.value), variable.template, variable.description, variable.isSensitive, variable.scope) + TemplateVariableJson(variable.name, Some(variable.value), variable.template, variable.description, variable.isSensitive, variable.scope.mkString(".")) } } diff --git a/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala b/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala index cd82d2f811..e333839cf7 100644 --- a/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala +++ b/silk-workbench/silk-workbench-core/app/org/silkframework/workbench/utils/ErrorResult.scala @@ -66,6 +66,10 @@ object ErrorResult { ex match { case requestEx: RequestException with JsonRequestException => requestEx.additionalJson + case requestEx: RequestException => + JsObject( + requestEx.additionalData.map(data => data._1 -> JsString(data._2.toString)).toSeq + ) case _ => Json.obj() } diff --git a/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala b/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala index 02f340804e..c770be7516 100644 --- a/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala +++ b/silk-workbench/silk-workbench-rules/app/controllers/transform/PeakTransformApi.scala @@ -11,13 +11,13 @@ import io.swagger.v3.oas.annotations.parameters.RequestBody import io.swagger.v3.oas.annotations.responses.ApiResponse import io.swagger.v3.oas.annotations.tags.Tag import io.swagger.v3.oas.annotations.{Operation, Parameter} -import org.silkframework.config.{Prefixes, TaskSpec} +import org.silkframework.config.{Prefixes, Task, TaskSpec} import org.silkframework.dataset.DatasetSpec.GenericDatasetSpec import org.silkframework.dataset._ import org.silkframework.dataset.rdf.RdfDataset import org.silkframework.entity._ import org.silkframework.entity.paths.{Path, UntypedPath} -import org.silkframework.plugins.dataset.rdf.executors.{LocalSparqlSelectExecutor, LocalSparqlSelectIterator} +import org.silkframework.plugins.dataset.rdf.executors.LocalSparqlSelectExecutor import org.silkframework.plugins.dataset.rdf.tasks.SparqlSelectCustomTask import org.silkframework.rule.TransformSpec.RuleSchemata import org.silkframework.rule.{ComplexUriMapping, TaskContext, TransformRule, TransformSpec} @@ -238,8 +238,9 @@ class PeakTransformApi @Inject() () extends InjectedController with UserContextA } else { val datasetTask = project.task[GenericDatasetSpec](sparqlDataset) datasetTask.data.plugin match { - case rdfDataset: RdfDataset with Dataset => - val entities = new LocalSparqlSelectIterator(sparqlSelectTask, rdfDataset.sparqlEndpoint, maxTryEntities, executionReportUpdater = None) + case _: RdfDataset with Dataset => + val executor = LocalSparqlSelectExecutor() + val entities = executor.executeOnSparqlEndpoint(sparqlSelectTask, datasetTask.asInstanceOf[Task[_ <: DatasetSpec[RdfDataset]]], None, maxTryEntities, executionReportUpdater = None) val entityDatasource = EntityDatasource(datasetTask, entities, sparqlSelectTask.outputSchema) try { entityDatasource.peak(ruleSchemata.inputSchema, maxTryEntities).use { exampleEntities => diff --git a/silk-workbench/silk-workbench-workspace/test/controllers/workspace/TaskApiTest.scala b/silk-workbench/silk-workbench-workspace/test/controllers/workspace/TaskApiTest.scala index 607b731a0f..756ed0575c 100644 --- a/silk-workbench/silk-workbench-workspace/test/controllers/workspace/TaskApiTest.scala +++ b/silk-workbench/silk-workbench-workspace/test/controllers/workspace/TaskApiTest.scala @@ -362,7 +362,7 @@ class TaskApiTest extends PlaySpec with IntegrationTestTrait with Matchers { p.addAnyTask(sparqlSelect, SparqlSelectCustomTask("SELECT * WHERE {?s ?p ?o}", optionalInputDataset = SparqlEndpointDatasetParameter(inMemoryDataset))) p.addAnyTask(sparqlDataset, DatasetSpec(SparqlDataset("http://endpoint"))) // Check tasks - taskValuesWithLabel(sparqlSelect).filter(_._2.isDefined) mustBe Seq(JsString(inMemoryDataset) -> Some(inMemoryDatasetLabel)) + taskValuesWithLabel(sparqlSelect).filter(_._2.isDefined) must contain theSameElementsAs Seq(JsString("jinja") -> Some("Jinja"), JsString(inMemoryDataset) -> Some(inMemoryDatasetLabel)) taskValuesWithLabel(sparqlDataset).filter(_._2.isDefined) mustBe Seq(JsString("parallel") -> Some("parallel")) taskValuesWithLabel(workflowId) // Just check that it returns anything taskValuesWithLabel(linkTaskId) diff --git a/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala b/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala index 4fd9a9aa84..46845c36bd 100644 --- a/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala +++ b/silk-workbench/silk-workbench-workspace/test/controllers/workspaceApi/ProjectTaskApiTest.scala @@ -94,7 +94,7 @@ class ProjectTaskApiTest extends AnyFlatSpec with SingleProjectWorkspaceProvider val datasetLabel = "In-memory dataset" val customLabel = "Custom SPARQL Update" val transformTask = "transformInContext" - project.addTask(customId, SparqlUpdateCustomTask("insert data {${}

}"), MetaData(Some(customLabel))) + project.addTask(customId, SparqlUpdateCustomTask("INSERT DATA { <{{ input.entity.PROP_FROM_ENTITY_SCHEMA1 }}>

}"), MetaData(Some(customLabel))) project.addTask(datasetId, DatasetSpec(InMemoryDataset()), metaData = MetaData(Some(datasetLabel))) project.addTask(transformTask, TransformSpec(DatasetSelection(datasetId), output = IdentifierOptionParameter(Some(Identifier(customId))))) val TaskContextResponse(inputTasks, outputTasks, originalInputs, originalOutputs) = taskContext(projectId, transformTask, WorkflowTaskContext( diff --git a/silk-workspace/src/main/scala/org/silkframework/workspace/ProjectTemplateVariablesManager.scala b/silk-workspace/src/main/scala/org/silkframework/workspace/ProjectTemplateVariablesManager.scala index 5ecb077291..aca3a89aa5 100644 --- a/silk-workspace/src/main/scala/org/silkframework/workspace/ProjectTemplateVariablesManager.scala +++ b/silk-workspace/src/main/scala/org/silkframework/workspace/ProjectTemplateVariablesManager.scala @@ -19,7 +19,7 @@ class ProjectTemplateVariablesManager(serializer: TemplateVariablesSerializer, l /** * The available variable scopes. */ - def scopes: Set[String] = GlobalTemplateVariables.scopes + projectScope + def scopes: Set[Seq[String]] = GlobalTemplateVariables.scopes + projectScope /** * Retrieves all template variables. diff --git a/silk-workspace/src/main/scala/org/silkframework/workspace/activity/workflow/LocalWorkflowExecutor.scala b/silk-workspace/src/main/scala/org/silkframework/workspace/activity/workflow/LocalWorkflowExecutor.scala index 00becbbea3..67764dfb44 100644 --- a/silk-workspace/src/main/scala/org/silkframework/workspace/activity/workflow/LocalWorkflowExecutor.scala +++ b/silk-workspace/src/main/scala/org/silkframework/workspace/activity/workflow/LocalWorkflowExecutor.scala @@ -375,7 +375,7 @@ case class LocalWorkflowExecutor(workflowTask: ProjectTask[Workflow], () } catch { case NonFatal(ex) => - throw WorkflowExecutionException(s"Exception occurred while writing to dataset '${resolvedDataset.label()}'. Cause: " + ex.getMessage, Some(ex)) + throw WorkflowExecutionException(s"Failed to write to dataset '${resolvedDataset.label()}': " + ex.getMessage, Some(ex)) } } diff --git a/silk-workspace/src/main/scala/org/silkframework/workspace/io/WorkspaceIO.scala b/silk-workspace/src/main/scala/org/silkframework/workspace/io/WorkspaceIO.scala index ebe77e3dee..6150bbc312 100644 --- a/silk-workspace/src/main/scala/org/silkframework/workspace/io/WorkspaceIO.scala +++ b/silk-workspace/src/main/scala/org/silkframework/workspace/io/WorkspaceIO.scala @@ -6,7 +6,7 @@ import org.silkframework.rule.{LinkSpec, TransformSpec} import org.silkframework.runtime.activity.UserContext import org.silkframework.runtime.plugin.PluginContext import org.silkframework.runtime.resource.ResourceManager -import org.silkframework.runtime.templating.{CombinedTemplateVariablesReader, GlobalTemplateVariables, InMemoryTemplateVariablesReader, TemplateVariables} +import org.silkframework.runtime.templating.{CombinedTemplateVariablesReader, GlobalTemplateVariables, InMemoryTemplateVariablesReader, TemplateVariableScopes, TemplateVariables} import org.silkframework.util.Identifier import org.silkframework.workspace.activity.workflow.Workflow import org.silkframework.workspace.resources.ResourceRepository @@ -88,7 +88,7 @@ object WorkspaceIO { prefixes: Prefixes, variables: TemplateVariables) (implicit userContext: UserContext): Unit = { - val variablesReader = CombinedTemplateVariablesReader(Seq(GlobalTemplateVariables, InMemoryTemplateVariablesReader(variables, Set("project")))) + val variablesReader = CombinedTemplateVariablesReader(Seq(GlobalTemplateVariables, InMemoryTemplateVariablesReader(variables, Set(TemplateVariableScopes.project)))) implicit val inputContext: PluginContext = PluginContext(resources = inputResources, prefixes = prefixes, user = userContext, templateVariables = variablesReader) for(taskTry <- inputWorkspace.readTasks[T](projectName)) { taskTry.taskOrError match { diff --git a/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala b/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala index 8cc03e705c..f3838ff3da 100644 --- a/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala +++ b/silk-workspace/src/test/scala/org/silkframework/workspace/WorkspaceProviderTestTrait.scala @@ -20,7 +20,7 @@ import org.silkframework.runtime.activity.{SimpleUserContext, UserContext} import org.silkframework.runtime.plugin.annotations.Plugin import org.silkframework.runtime.plugin._ import org.silkframework.runtime.resource.ResourceNotFoundException -import org.silkframework.runtime.templating.{TemplateVariable, TemplateVariables} +import org.silkframework.runtime.templating.{TemplateVariable, TemplateVariableScopes, TemplateVariables} import org.silkframework.runtime.users.DefaultUserManager import org.silkframework.util.{Identifier, MockitoSugar, Uri} import org.silkframework.workspace.WorkspaceProviderTestPlugins.{FailingCustomTask, FailingTaskException} @@ -690,9 +690,9 @@ trait WorkspaceProviderTestTrait extends AnyFlatSpec with Matchers with MockitoS // Add variables and read again val templateVariables1 = TemplateVariables(Seq( - TemplateVariable("myVar1", "myValue1", None, None, isSensitive = false, "project"), - TemplateVariable("myVar2", "myValue2", None, Some("test description"), isSensitive = true, "project"), - TemplateVariable("myVar3", "myValue2b", Some("{{project.myVar2}}b"), None, isSensitive = true, "project") + TemplateVariable("myVar1", "myValue1", None, None, isSensitive = false, TemplateVariableScopes.project), + TemplateVariable("myVar2", "myValue2", None, Some("test description"), isSensitive = true, TemplateVariableScopes.project), + TemplateVariable("myVar3", "myValue2b", Some("{{project.myVar2}}b"), None, isSensitive = true, TemplateVariableScopes.project) )) variables.putVariables(templateVariables1) refreshTest { @@ -701,9 +701,9 @@ trait WorkspaceProviderTestTrait extends AnyFlatSpec with Matchers with MockitoS // Modify variables and read again val templateVariables2 = TemplateVariables(Seq( - TemplateVariable("myVar2", "myValue2", None, Some("test description 2"), isSensitive = true, "project"), - TemplateVariable("myVar4", "myValue2b", Some("{{project.myVar2}}b"), None, isSensitive = true, "project"), - TemplateVariable("myVar1", "myValue1", None, None, isSensitive = false, "project") + TemplateVariable("myVar2", "myValue2", None, Some("test description 2"), isSensitive = true, TemplateVariableScopes.project), + TemplateVariable("myVar4", "myValue2b", Some("{{project.myVar2}}b"), None, isSensitive = true, TemplateVariableScopes.project), + TemplateVariable("myVar1", "myValue1", None, None, isSensitive = false, TemplateVariableScopes.project) )) variables.putVariables(templateVariables2) refreshTest { diff --git a/silk-workspace/src/test/scala/org/silkframework/workspace/xml/XmlZipProjectMarshalingTest.scala b/silk-workspace/src/test/scala/org/silkframework/workspace/xml/XmlZipProjectMarshalingTest.scala index c6fc831586..0059f75c06 100644 --- a/silk-workspace/src/test/scala/org/silkframework/workspace/xml/XmlZipProjectMarshalingTest.scala +++ b/silk-workspace/src/test/scala/org/silkframework/workspace/xml/XmlZipProjectMarshalingTest.scala @@ -11,7 +11,7 @@ import org.silkframework.runtime.activity.UserContext import org.silkframework.runtime.plugin.annotations.Plugin import org.silkframework.runtime.plugin.{PluginContext, PluginRegistry, TestPluginContext} import org.silkframework.runtime.resource._ -import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, InMemoryTemplateVariablesReader, TemplateEngine, TemplateVariableValue} +import org.silkframework.runtime.templating.{CompiledTemplate, EvaluationConfig, InMemoryTemplateVariablesReader, TemplateEngine, TemplateVariableScopes, TemplateVariableValue} import org.silkframework.util.{ConfigTestTrait, Uri} import org.silkframework.workspace.resources.InMemoryResourceRepository import org.silkframework.workspace.{InMemoryWorkspaceProvider, LoadedTask, ProjectConfig, Workspace} @@ -106,7 +106,7 @@ class XmlZipProjectMarshalingTest extends AnyFlatSpec with Matchers with ConfigT variables.map("linkLimitTimesTen").value shouldBe "10000" implicit val pluginContext: PluginContext = TestPluginContext(prefixes = Prefixes.default, resources = resources, - templateVariables = InMemoryTemplateVariablesReader(variables, Set("project"))) + templateVariables = InMemoryTemplateVariablesReader(variables, Set(TemplateVariableScopes.project))) // Datasets val datasets = workspace.provider.readTasks[GenericDatasetSpec](projectName)