diff --git a/build.sbt b/build.sbt index 4b5996d..79794ba 100644 --- a/build.sbt +++ b/build.sbt @@ -25,6 +25,7 @@ libraryDependencies ++= Seq( "de.djini" %% "scutil-core" % "0.151.0" % "compile", "de.djini" %% "scutil-swing" % "0.151.0" % "compile", "de.djini" %% "scjson-codec" % "0.169.0" % "compile", + "org.scala-lang.modules" %% "scala-xml" % "1.1.1" % "compile", "org.apache.sanselan" % "sanselan" % "0.97-incubator" % "compile", "org.simplericity.macify" % "macify" % "1.6" % "compile", "org.apache.httpcomponents" % "httpclient" % "4.5.6" % "compile", diff --git a/src/main/resources/gallery_wikimedia_commons.bpp b/src/main/resources/gallery_wikimedia_commons.bpp index 1ac34f2..d1778a6 100644 --- a/src/main/resources/gallery_wikimedia_commons.bpp +++ b/src/main/resources/gallery_wikimedia_commons.bpp @@ -13,7 +13,7 @@ # if (!batch.uploads.empty) { # for (var upload : batch.uploads) { -# if (!upload.description.startsWith("{{Information")) { +# if (!upload.description.startsWith("{{Information") && !upload.description.startsWith("{{Artwork")) { $(upload.title)|$(upload.description.replaceAll("\n", " ")) # } else { $(upload.title) diff --git a/src/main/resources/image_wikimedia_commons.bpp b/src/main/resources/image_wikimedia_commons.bpp index 7a52b8c..035c0b2 100644 --- a/src/main/resources/image_wikimedia_commons.bpp +++ b/src/main/resources/image_wikimedia_commons.bpp @@ -3,7 +3,7 @@ #// in: common:Common, upload:Upload # == {{int:filedesc}} == -# if (!upload.description.startsWith("{{Information")) { +# if (!upload.description.startsWith("{{Information") && !upload.description.startsWith("{{Artwork")) { {{Information |Description=$(common.description) $(upload.description) diff --git a/src/main/resources/oaipmh.properties b/src/main/resources/oaipmh.properties new file mode 100644 index 0000000..fd63b96 --- /dev/null +++ b/src/main/resources/oaipmh.properties @@ -0,0 +1,15 @@ +# encoding: ISO-8859-1 with UTF-8 escapes +# This file allows to configure the OAI-PMH metadata parsing. See OaiPmh2.scala + +# Marker string used to find beginning of an history section in the tags +historyMarker=Entry procedures: +# GLAM institution +institution=City Archives of Springfield +# Filename prefix stripped to find GLAM reference from filename +prefix=FOO1234_ +# Name of the fonds being imported +fonds=John Smith Fonds +# Description language +lang=en +# You can define any property to replace artist names by a mediawiki template +SMITH,_John_(10/10/1875-10/10/1931)._Author={{Creator:John Smith}} diff --git a/src/main/scala/commonist/CommonistMain.scala b/src/main/scala/commonist/CommonistMain.scala index 61e7cbf..6bf892d 100644 --- a/src/main/scala/commonist/CommonistMain.scala +++ b/src/main/scala/commonist/CommonistMain.scala @@ -148,7 +148,7 @@ object CommonistMain extends Logging { * load and display imageUIs for all files in the new directory */ private def doChangeDirectory(directory:File) { - changeDirectory change new ChangeDirectoryTask(mainWindow, imageListUI, statusUI, thumbnails, directory) + changeDirectory change new ChangeDirectoryTask(mainWindow, imageListUI, statusUI, thumbnails, directory, loader) } /** Action: start uploading selected files */ diff --git a/src/main/scala/commonist/task/ChangeDirectoryTask.scala b/src/main/scala/commonist/task/ChangeDirectoryTask.scala index 0354e01..91822cd 100644 --- a/src/main/scala/commonist/task/ChangeDirectoryTask.scala +++ b/src/main/scala/commonist/task/ChangeDirectoryTask.scala @@ -2,19 +2,30 @@ package commonist.task import java.io.File +import scala.language.postfixOps +import scala.xml._ + +import scutil.base.implicits._ import scutil.core.implicits._ +import scutil.io._ import scutil.log._ import commonist._ import commonist.thumb._ import commonist.ui._ import commonist.ui.later._ +import commonist.util._ /** change the directory displayed in the ImageListUI */ -final class ChangeDirectoryTask(mainWindow:MainWindow, imageListUI:ImageListUI, statusUI:StatusUI, thumbnails:Thumbnails, directory:File) extends Task { +final class ChangeDirectoryTask(mainWindow:MainWindow, imageListUI:ImageListUI, statusUI:StatusUI, thumbnails:Thumbnails, directory:File, loader:Loader) extends Task { private val imageListUILater = new ImageListUILater(imageListUI) private val statusUILater = new StatusUILater(statusUI) + private def getOaiPmhProps():Map[String,String] = { + val propsURL = loader resourceURL "oaipmh.properties" getOrError "cannot load oaipmh.properties" + PropertiesUtil loadURL (propsURL, None) + } + override protected def execute() { DEBUG("clear") @@ -31,11 +42,14 @@ final class ChangeDirectoryTask(mainWindow:MainWindow, imageListUI:ImageListUI, val (readable,unreadable) = sorted partition { _.canRead } unreadable foreach { it => WARN("cannot read", it) } - val max = readable.length + val (xmls,images) = readable partition { f => f.getName() endsWith ".xml" } + val oaipmh = xmls.map(XML loadFile).filter("OAI-PMH" == _.label).map(new OaiPmh2(_, getOaiPmhProps)).toVector + + val max = images.length var cur = 0 var last = 0L try { - for (file <- readable) { + for (file <- images) { check() statusUILater determinate ("imageList.loading", cur, max, file.getPath, int2Integer(cur), int2Integer(max)) @@ -44,7 +58,7 @@ final class ChangeDirectoryTask(mainWindow:MainWindow, imageListUI:ImageListUI, // using Thread.interrupt while this is running kills the EDT?? val thumbnail = thumbnails thumbnail file val thumbnailMaxSize = thumbnails.getMaxSize - imageListUILater add (file, thumbnail, thumbnailMaxSize) + imageListUILater add (file, oaipmh, thumbnail, thumbnailMaxSize) try { Thread.sleep(100) } catch { case e:InterruptedException => WARN("interrupted", e) } diff --git a/src/main/scala/commonist/ui/ImageListUI.scala b/src/main/scala/commonist/ui/ImageListUI.scala index 993a834..566ef6e 100644 --- a/src/main/scala/commonist/ui/ImageListUI.scala +++ b/src/main/scala/commonist/ui/ImageListUI.scala @@ -77,9 +77,9 @@ final class ImageListUI(programHeading:String, programIcon:Image) extends JPanel } /** adds a File UI */ - def add(file:File, icon:Option[Icon], thumbnailMaxSize:Int) { + def add(file:File, oaipmh:Vector[OaiPmh2], icon:Option[Icon], thumbnailMaxSize:Int) { val imageUI = - new ImageUI(file, icon, thumbnailMaxSize, programHeading, programIcon, new ImageUICallback { + new ImageUI(file, oaipmh, icon, thumbnailMaxSize, programHeading, programIcon, new ImageUICallback { def updateSelectStatus() { outer.updateSelectStatus() } }) diff --git a/src/main/scala/commonist/ui/ImageUI.scala b/src/main/scala/commonist/ui/ImageUI.scala index dcbedd0..367d462 100644 --- a/src/main/scala/commonist/ui/ImageUI.scala +++ b/src/main/scala/commonist/ui/ImageUI.scala @@ -22,7 +22,7 @@ trait ImageUICallback { } /** a data editor with a thumbnail preview for an image File */ -final class ImageUI(file:File, icon:Option[Icon], thumbnailMaxSize:Int, programHeading:String, programIcon:Image, callback:ImageUICallback) extends JPanel { +final class ImageUI(file:File, oaipmh:Vector[OaiPmh2], icon:Option[Icon], thumbnailMaxSize:Int, programHeading:String, programIcon:Image, callback:ImageUICallback) extends JPanel { private val thumbDimension = new Dimension(thumbnailMaxSize, thumbnailMaxSize) private var uploadSuccessful:Option[Boolean] = None @@ -155,20 +155,35 @@ final class ImageUI(file:File, icon:Option[Icon], thumbnailMaxSize:Int, programH // BETTER move unparsers and parsers together - private val exif = EXIF extract file - private val exifDate = exif.date cata ("", _ format "yyyy-MM-dd HH:mm:ss") - private val exifGPS = exif.gps cata ("", it => it.latitude.toString + "," + it.longitude.toString) - private val exifHeading = exif.heading cata ("", _.toString) - private val exifDesc = exif.description getOrElse "" - private val fixedName = Filename fix file.getName - - uploadEditor setSelected false - nameEditor setText fixedName - descriptionEditor setText exifDesc - dateEditor setText exifDate - coordinatesEditor setText exifGPS - headingEditor setText exifHeading - categoriesEditor setText "" + private def getExif:ImageData = { + val exif = EXIF extract file + ImageData( + file, + false, + Filename fix file.getName, + exif.description getOrElse "", + exif.date cata ("", _ format "yyyy-MM-dd HH:mm:ss"), + exif.gps cata ("", it => it.latitude.toString + "," + it.longitude.toString), + exif.heading cata ("", _.toString), + "" + ) + } + + def loadImageData(data:ImageData) { + uploadEditor setSelected data.upload + nameEditor setText data.name + descriptionEditor setText data.description + dateEditor setText data.date + coordinatesEditor setText data.coordinates + headingEditor setText data.heading + categoriesEditor setText data.categories + } + + private val exifData:ImageData = getExif + // initialize from EXIF data, if any + loadImageData(exifData) + // initialize from OAI-PMH data, if any + oaipmh.map(_.getImageData(exifData)).filter(_.isDefined).map(_.map(loadImageData)) // BETTER could be a trait override def getMaximumSize():Dimension = diff --git a/src/main/scala/commonist/ui/later/ImageListUILater.scala b/src/main/scala/commonist/ui/later/ImageListUILater.scala index 541af58..bd51f68 100644 --- a/src/main/scala/commonist/ui/later/ImageListUILater.scala +++ b/src/main/scala/commonist/ui/later/ImageListUILater.scala @@ -7,6 +7,7 @@ import javax.swing.Icon import scutil.gui.SwingUtil._ import commonist.ui.ImageListUI +import commonist.util.OaiPmh2 /** wraps a ImageListUI's methods in SwingUtilities.invokeAndWait */ final class ImageListUILater(ui:ImageListUI) { @@ -16,9 +17,9 @@ final class ImageListUILater(ui:ImageListUI) { } } - def add(file:File, thumbnail:Option[Icon], thumbnailMaxSize:Int) { + def add(file:File, oaipmh:Vector[OaiPmh2], thumbnail:Option[Icon], thumbnailMaxSize:Int) { edtWait { - ui add (file, thumbnail, thumbnailMaxSize) + ui add (file, oaipmh, thumbnail, thumbnailMaxSize) } } diff --git a/src/main/scala/commonist/util/OaiPmh2.scala b/src/main/scala/commonist/util/OaiPmh2.scala new file mode 100644 index 0000000..8144157 --- /dev/null +++ b/src/main/scala/commonist/util/OaiPmh2.scala @@ -0,0 +1,128 @@ +package commonist.util + +import commonist.data._ + +import scala.xml._ + +import scutil.log._ + +/** + * Parse and extract image metadata from OAI-PMH 2.0 files. + * See www.openarchives.org/OAI/openarchivesprotocol.html + */ +class OaiPmh2(doc:Elem, props:Map[String,String]) extends Logging { + + /** Extract text from node and fix badly formatted XML strings (escaped twice) **/ + private def text(e:NodeSeq) = { + e.map(_.text).mkString("\n").trim() + .replaceAll("&", "&").replaceAll(""", "\"").replaceAll("'", "'").replaceAll("<", "<").replaceAll(">", ">") + } + + /** + * Formats name as follows: "filename_without_ext - title.ext" + * Makes sure we don't have two consecutive dots if title ends by a dot + */ + private def formatName(name:String, title:String):String = { + name.replaceFirst("[.][^.]+$", "") + " - " + title + name.substring(name.lastIndexOf(".")).replaceAll("\\.\\.", ".") + } + + /** Attempt to get a nicer artist value from properties, otherwise return raw creator */ + private def artist(creator:String):String = { + val prop = creator.replaceAll(" ", "_") + props get prop getOrElse creator + } + + /** Detect public domain mention in various languages, otherwise return raw rights */ + private def permission(rights:String):String = { + val lowercase = rights.toLowerCase() + if (lowercase.contains("public domain") + || lowercase.contains("domaine public") + || lowercase.contains("gemeinfreiheit") + || lowercase.contains("dominio público") + ) + "{{PD-old|PD-70}}" + else rights + } + + /** Detect photographs from format, otherwise return raw format */ + private def medium(format:String):String = { + val lowercase = format.toLowerCase() + if (lowercase.contains("photo") || lowercase.contains("foto")) + "{{Technique|photograph}}" + else format + } + + /** Detect size in centimeters, otherwise return raw format */ + private def dimensions(format:String):String = { + ".*; (\\d+) x (\\d+) cm ;.*".r.findAllMatchIn(format).foreach { m => + return "{{Size|unit=cm|height=" + m.group(1) + "|width=" + m.group(2) + "}}" + } + "" + } + + /** localize given string according to language defined in properties */ + private def localized(text:String):String = { + if (text.nonEmpty) + "{{" + props("lang") + "|" + text + "}}" + else text + } + + /** Artwork description */ + private def artwork(dc:Node, filenameWithOutExt:String):String = { + val historyMarker = props("historyMarker") + val institution = props("institution") + val prefix = props("prefix") + val fonds = props("fonds") + + val id = filenameWithOutExt.replaceAll(prefix, "") + val fullDescription = text(dc \ "description") + val historyIndex = fullDescription.indexOf(historyMarker) + val objectHistory = if (historyIndex >= 0) fullDescription.substring(historyIndex + historyMarker.length).trim() else "" + val description = if (historyIndex >= 0) fullDescription.substring(0, historyIndex).trim() else fullDescription + val format = text(dc \ "format").replaceAll("image/jpeg", "").trim() + + "{{Artwork\n" + + "|ID={{" + institution + " - FET link|" + id + "}}\n" + + "|artist=" + artist(text(dc \ "creator")) + "\n" + + "|credit line=\n" + + "|date=" + text(dc \ "date") + "\n" + + "|location=\n" + + "|description=" + localized(description) + "\n" + + "|dimensions=" + dimensions(format) + "\n" + + "|gallery={{Institution:" + institution + "}}\n" + + "|medium=" + medium(format) + "\n" + + "|object history=" + localized(objectHistory) + "\n" + + "|permission=" + permission(text(dc \ "rights")) + "\n" + + "|references=\n" + + "|source={{" + fonds + " - " + institution + "}}\n" + + "|title=" + localized(text(dc \ "title")) + "\n" + + "}}", + } + + /** Fills image metadata if found via its filename */ + def getImageData(data:ImageData):Option[ImageData] = { + val filenameWithOutExt = data.file.getName().replaceFirst("[.][^.]+$", ""); + val list:List[Node] = (doc \\ "dc").find(dc => text(dc \ "relation").contains(filenameWithOutExt + ".")).toList + if (list.size == 1) { + val dc:Node = list(0) + + Some(ImageData( + data.file, + data.upload, + // "filename - title.ext" + formatName(data.name, text(dc \ "title")), + // {{Artwork}} description + artwork(dc, filenameWithOutExt), + text(dc \ "date"), + data.coordinates, + data.heading, + data.categories + )) + } else if (list.size > 1) { + WARN("Found several records for " + filenameWithOutExt) + Option.empty + } else { + Option.empty + } + } +}