diff --git a/silk-core/src/main/resources/logback.xml b/silk-core/src/main/resources/logback.xml index c43a5e2688..5d96b1d77d 100644 --- a/silk-core/src/main/resources/logback.xml +++ b/silk-core/src/main/resources/logback.xml @@ -1,9 +1,9 @@ - + - ${elds.home}/var/log/dataintegration.log + ${logDir}/dataintegration.log %d{yyyy-MM-dd'T'HH:mm:ss.SSSX,UTC} %-5level %logger{15} - %message%n%xException @@ -20,7 +20,7 @@ - /tmp/spark.log + ${logDir}/spark.log /spark.%d{yyyyMMdd}.log.zip diff --git a/silk-core/src/main/resources/reference.conf b/silk-core/src/main/resources/reference.conf index e495faf1e4..ec15015602 100644 --- a/silk-core/src/main/resources/reference.conf +++ b/silk-core/src/main/resources/reference.conf @@ -1,6 +1,28 @@ -# Set the elds.home variable. The ELDS_HOME environment variable is used if it has been defined. -elds.home = ${user.home}"/.silk" -elds.home = ${?ELDS_HOME} +################################################# +# Directories +################################################# + +directories = { + + # Base directory for all Silk related files. + base = ${user.home}"/.silk" + + # Directory for all configuration files. + config = ${directories.base}"/config" + + # Directory for all data. + data = ${directories.base}"/data" + + # Directory for all caches. + cache = ${directories.base}"/caches" + + # Directory for all logs. + logs = ${directories.base}"/logs" + +} + +# Temp file directory used e.g. for storing result files for async requests +config.tempFilesDirectory = ${directories.cache}"/tmp" ################################################# # Workspace Provider @@ -10,7 +32,7 @@ elds.home = ${?ELDS_HOME} workspace.provider.plugin = fileWorkspaceProvider workspace.provider.fileWorkspaceProvider = { # Directory to hold the workspace - dir = ${elds.home}"/workspace/" + dir = ${directories.data}"/workspace/" } ################################################# @@ -20,7 +42,7 @@ workspace.provider.fileWorkspaceProvider = { # Holds all resources in project specific directories. workspace.repository.plugin = projectFile workspace.repository.projectFile = { - dir = ${elds.home}"/workspace/" + dir = ${directories.data}"/workspace/" } # Setting the default internal dataset. diff --git a/silk-core/src/main/scala/org/silkframework/config/Config.scala b/silk-core/src/main/scala/org/silkframework/config/Config.scala index 65794104f9..75c9ed3291 100644 --- a/silk-core/src/main/scala/org/silkframework/config/Config.scala +++ b/silk-core/src/main/scala/org/silkframework/config/Config.scala @@ -1,12 +1,9 @@ package org.silkframework.config import com.typesafe.config.{ConfigException, ConfigFactory, Config => TypesafeConfig} -import org.silkframework.config.Config._ import org.silkframework.runtime.validation.ValidationException -import java.io.File import java.time.{Duration, Instant} -import java.util.logging.Logger import javax.inject.Named import scala.language.implicitConversions @@ -24,73 +21,43 @@ trait Config { def timestamp: Instant } -object Config{ - final val ELDS_HOME_ENV: String = "ELDS_HOME" - final val ELDS_HOME_CONF: String = "elds.home" - final val USER_HOME_CONF: String = "user.home" - final val DATAINTEGRATION_PATH: String = "/etc/dataintegration" - final val DATAINTEGRATION_CONFIG_DIR: String = DATAINTEGRATION_PATH + "/conf" - final val DATAINTEGRATION_CONF: String = "/conf/dataintegration.conf" - final val REFERENCE_CONF: String = "/conf/reference.conf" - final val APPLICATION_CONF: String = "/conf/application.conf" -} - @Named("default") class DefaultConfig private() extends Config { + // Overwrite default logging pattern for java.util.logging if (System.getProperty("java.util.logging.SimpleFormatter.format") == null) { System.setProperty("java.util.logging.SimpleFormatter.format", "%1$tb %1$td, %1$tY %1$tl:%1$tM:%1$tS %1$Tp %3$s%n%4$s: %5$s%6$s%n") } - private lazy val log = Logger.getLogger(this.getClass.getName) - private var config = this.synchronized {init()} private var currentTimestamp = Instant.now() - /** - * Will check and return if ELDS_HOME was defined either as environment variable or in the dataintegration config. - */ - lazy val eldsHomeDir: Option[File] ={ - ConfigFactory.invalidateCaches() - val conf = ConfigFactory.load() - Option(if(conf.hasPath(ELDS_HOME_CONF)) conf.getString(ELDS_HOME_CONF) else System.getenv(ELDS_HOME_ENV)) - .map(p => new File(p)) - } - private def init(): TypesafeConfig = { this.synchronized { ConfigFactory.invalidateCaches() - var fullConfig = ConfigFactory.load() - // Check if we are running as part of the eccenca Linked Data Suite - eldsHomeDir match { - case Some(eldsHome) => - val dataintegrationConfigPath = DATAINTEGRATION_PATH + DATAINTEGRATION_CONF - val configFile = new File(eldsHome, dataintegrationConfigPath) - if (!configFile.exists) { - val msg = new StringBuilder - msg ++= s"Configuration file not found at: ${configFile.getAbsolutePath}.\n" - msg ++= s"Falling back on default reference.conf file.\n" - msg ++= "Possible fix: Map a volume with the config file to this location.\n" - msg ++= "Otherwise set elds.home or $ELDS_HOME to point to the correct location." - log.warning(msg.toString()) - } - fullConfig = ConfigFactory.parseFile(configFile).withFallback(fullConfig) - case None => Logger.getLogger(this.getClass.getName).info( - "Variable $ELDS_HOME is not defined. If this application is not running in the ELDS context " + - "you can ignore this warning. Otherwise please configure $ELDS_HOME or elds.home." - ) - } - // Check if we are running as part of the Play Framework - val playConfig1 = new File(System.getProperty(USER_HOME_CONF) + REFERENCE_CONF) - val playConfig2 = new File(System.getProperty(USER_HOME_CONF) + APPLICATION_CONF) - if (playConfig1.exists()) { - fullConfig = fullConfig.withFallback(ConfigFactory.parseFile(playConfig1)) + // Check for external config file based on environment variables + val configPath = sys.env.get("DATAINTEGRATION_CONFIG") match { + case Some(configDir) => s"$configDir/dataintegration.conf" + case None => sys.env.get("CMEM_HOME") match { + case Some(cmemHome) => s"$cmemHome/dataintegration/config/dataintegration.conf" + case None => sys.props.get("user.home").map(home => s"$home/.cmem/dataintegration/config/dataintegration.conf").getOrElse("") + } } - if (playConfig2.exists()) { - fullConfig = fullConfig.withFallback(ConfigFactory.parseFile(playConfig2)) + + val configFile = new java.io.File(configPath) + val fullConfig = if (configFile.exists()) { + println(s"Loading external config from: $configPath") + // Load with external config having highest priority + val externalConfig = ConfigFactory.parseFile(configFile) + ConfigFactory.systemProperties() + .withFallback(externalConfig) + .withFallback(ConfigFactory.load()) + } else { + ConfigFactory.load() } + currentTimestamp = Instant.now() fullConfig.resolve() } diff --git a/silk-core/src/main/scala/org/silkframework/config/Directories.scala b/silk-core/src/main/scala/org/silkframework/config/Directories.scala new file mode 100644 index 0000000000..9827c6c951 --- /dev/null +++ b/silk-core/src/main/scala/org/silkframework/config/Directories.scala @@ -0,0 +1,36 @@ +package org.silkframework.config + +import java.io.File +import java.nio.file.Path + +/** + * Holds the paths to important directories used by Silk. + * + * @param config The directory where configuration files are stored. + * @param data The directory where project data is stored. + * @param cache The directory where cache files are stored. + * @param logs The directory where log files are stored. + */ +case class Directories(config: Path, + data: Path, + cache: Path, + logs: Path) + +object Directories { + + private val config: Directories = { + val dirConfig = DefaultConfig.instance().getConfig("directories") + Directories( + config = new File(dirConfig.getString("config")).toPath, + data = new File(dirConfig.getString("data")).toPath, + cache = new File(dirConfig.getString("cache")).toPath, + logs = new File(dirConfig.getString("logs")).toPath + ) + } + + /** + * Returns the configured directories. + */ + def apply(): Directories = config + +} diff --git a/silk-core/src/main/scala/org/silkframework/util/FileUtils.scala b/silk-core/src/main/scala/org/silkframework/util/FileUtils.scala index c30fecb8ad..af346d124d 100644 --- a/silk-core/src/main/scala/org/silkframework/util/FileUtils.scala +++ b/silk-core/src/main/scala/org/silkframework/util/FileUtils.scala @@ -17,7 +17,7 @@ package org.silkframework.util import org.silkframework.config.DefaultConfig import java.io.{File, IOException} -import java.nio.file.Files +import java.nio.file.{Files, Paths} import scala.language.implicitConversions import scala.util.Try @@ -35,11 +35,16 @@ object FileUtils { val tmpDir = Files.createTempDirectory("silk-tmp-file-dir") tmpDir.toString } - if(cfg.hasPath(tmpDirKey)) { + val dir = if(cfg.hasPath(tmpDirKey)) { Try(cfg.getString(tmpDirKey)).getOrElse(default) } else { default } + // Ensure the directory exists (needed when accessed outside ApplicationValidationModule, e.g. standalone Silk) + Files.createDirectories(Paths.get(dir)) + // Redirect all standard Java temp file creation to the configured directory + System.setProperty("java.io.tmpdir", dir) + dir } } diff --git a/silk-rules/src/main/scala/org/silkframework/rule/RuntimeLinkingConfig.scala b/silk-rules/src/main/scala/org/silkframework/rule/RuntimeLinkingConfig.scala index 77d9c0316f..d206c40719 100644 --- a/silk-rules/src/main/scala/org/silkframework/rule/RuntimeLinkingConfig.scala +++ b/silk-rules/src/main/scala/org/silkframework/rule/RuntimeLinkingConfig.scala @@ -31,7 +31,6 @@ import org.silkframework.rule.execution.ExecutionMethod * @param partitionSize The maximum size of the entity partitions in the cache. * @param numThreads The number of concurrent threads used for matching. * @param generateLinksWithEntities Generate links with the entities they connect. - * @param homeDir The directory used by Silk to store persistent information such as caches. * @param sampleSizeOpt Load all entities if set to None, else only load a random sample of max. the configured size * from each data source to be linked. * @param linkLimit If defined, the execution will stop after the configured number of links is reached and will return @@ -48,7 +47,6 @@ case class RuntimeLinkingConfig(executionMethod: ExecutionMethod = ExecutionMeth partitionSize: Int = 1000, numThreads: Int = Runtime.getRuntime.availableProcessors(), generateLinksWithEntities: Boolean = false, - homeDir: String = System.getProperty("user.home") + "/.silk/", logLevel: Level = Level.INFO, sampleSizeOpt: Option[Int] = None, linkLimit: Option[Int] = None, diff --git a/silk-rules/src/main/scala/org/silkframework/rule/execution/GenerateLinks.scala b/silk-rules/src/main/scala/org/silkframework/rule/execution/GenerateLinks.scala index 02191fccec..c9f0106dba 100644 --- a/silk-rules/src/main/scala/org/silkframework/rule/execution/GenerateLinks.scala +++ b/silk-rules/src/main/scala/org/silkframework/rule/execution/GenerateLinks.scala @@ -15,7 +15,7 @@ package org.silkframework.rule.execution import org.silkframework.cache.{EntityCache, FileEntityCache, MemoryEntityCache} -import org.silkframework.config.{Prefixes, Task} +import org.silkframework.config.{Directories, Prefixes, Task} import org.silkframework.dataset.{DataSource, LinkSink} import org.silkframework.entity.{Entity, EntitySchema, Link} import org.silkframework.execution.ExecutionReport @@ -193,11 +193,11 @@ class GenerateLinks(task: Task[LinkSpec], val sourceSchema = comparisonToRestrictionConverter.extendEntitySchemaWithLinkageRuleRestriction(entityDescs.source, rule, sourceOrTarget = true) val targetSchema = comparisonToRestrictionConverter.extendEntitySchemaWithLinkageRuleRestriction(entityDescs.target, rule, sourceOrTarget = false) if (runtimeConfig.useFileCache) { - val cacheDir = new File(runtimeConfig.homeDir + "/entityCache/" + task.id + UUID.randomUUID().toString) - + val cacheDir = Directories().cache.resolve("linkingCaches") + val fileName = task.id + "_" + UUID.randomUUID().toString DPair( - source = new FileEntityCache(sourceSchema, sourceIndexFunction, cacheDir + "_source/", runtimeConfig), - target = new FileEntityCache(targetSchema, targetIndexFunction, cacheDir + "_target/", runtimeConfig) + source = new FileEntityCache(sourceSchema, sourceIndexFunction, cacheDir.resolve(fileName + "_source").toFile, runtimeConfig), + target = new FileEntityCache(targetSchema, targetIndexFunction, cacheDir.resolve(fileName + "_target").toFile, runtimeConfig) ) } else { DPair( diff --git a/silk-workbench/conf/application.conf b/silk-workbench/conf/application.conf index e9192f0592..26196d7926 100644 --- a/silk-workbench/conf/application.conf +++ b/silk-workbench/conf/application.conf @@ -97,7 +97,4 @@ cache-updater { } } -# Temp file directory used e.g. for storing result files for async requests -config.tempFilesDirectory = ${silk.home}"/tmp" - play.modules.enabled += "cacheUpdater.CacheUpdaterModule" \ No newline at end of file diff --git a/silk-workbench/silk-workbench-core/app/config/WorkbenchConfig.scala b/silk-workbench/silk-workbench-core/app/config/WorkbenchConfig.scala index d74d359ebe..6e73c41639 100644 --- a/silk-workbench/silk-workbench-core/app/config/WorkbenchConfig.scala +++ b/silk-workbench/silk-workbench-core/app/config/WorkbenchConfig.scala @@ -2,7 +2,7 @@ package config import com.typesafe.config.{Config => TypesafeConfig} import config.WorkbenchConfig.Tabs -import org.silkframework.config.{Config, DefaultConfig} +import org.silkframework.config.{Config, DefaultConfig, Directories} import org.silkframework.runtime.metrics.MeterRegistryProvider import org.silkframework.runtime.metrics.MetricsConfig.prefix import org.silkframework.runtime.resource._ @@ -207,7 +207,7 @@ object WorkbenchConfig { */ lazy val get = { val config = Configuration(DefaultConfig.instance()) - val resourceLoader = getResourceLoader + val resourceLoader = createConfigResourceLoader WorkbenchConfig( title = config.getOptional[String]("workbench.title").getOrElse("Silk Workbench"), @@ -250,16 +250,13 @@ object WorkbenchConfig { } } - def getResourceLoader: ResourceLoader = { - DefaultConfig.instance.eldsHomeDir match { - case None => - // If no eLDs home directory is set, use the classpath resource loader only. - ClasspathResourceLoader("") - case Some(eldsHome) => - // If an eLDs home directory is set, use the file resource manager for the config directory. - val configDir = new File(eldsHome, Config.DATAINTEGRATION_CONFIG_DIR) - FallbackResourceManager(ReadOnlyResourceManager(ClasspathResourceLoader("")), FileResourceManager(configDir), writeIntoFallbackLoader = false) - } + /** + * The resource loader for config resources, inclusing logos and welcome messages. + */ + def createConfigResourceLoader: ResourceLoader = { + // First search in the branding directory inside the data directory and then in the classpath + val configDir = Directories().config.toFile + FallbackResourceManager(FileResourceManager(configDir), ReadOnlyResourceManager(ClasspathResourceLoader("")), writeIntoFallbackLoader = false) } /** diff --git a/silk-workbench/silk-workbench-workspace/app/controllers/workspace/WorkspaceController.scala b/silk-workbench/silk-workbench-workspace/app/controllers/workspace/WorkspaceController.scala index aeb825febb..e455deef7c 100644 --- a/silk-workbench/silk-workbench-workspace/app/controllers/workspace/WorkspaceController.scala +++ b/silk-workbench/silk-workbench-workspace/app/controllers/workspace/WorkspaceController.scala @@ -70,7 +70,7 @@ class WorkspaceController @Inject() (implicit workspaceReact: WorkspaceReact) ex // Load example and write it to a temporary file val exampleFile = Files.createTempFile("example", ".zip") - val inputStream = WorkbenchConfig.getResourceLoader.get("example.zip").inputStream + val inputStream = WorkbenchConfig.createConfigResourceLoader.get("example.zip").inputStream try { Files.copy(inputStream, exampleFile, StandardCopyOption.REPLACE_EXISTING)