DDMAL · SCN-MNG · Jan 9, 2026 · Jan 16, 2026 · Jan 16, 2026 · Jan 16, 2026
diff --git a/shared/rdf_config/wjazzd.toml b/shared/rdf_config/wjazzd.toml
@@ -0,0 +1,73 @@
+[general]
+name = "wjazzd"
+csv_folder = "../../wjazzd/data/reconciled"
+rdf_output_folder = "../../wjazzd/data/rdf"
+test_mode = false
+
+[namespaces]
+rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+rdfs = "http://www.w3.org/2000/01/rdf-schema#"
+xsd = "http://www.w3.org/2001/XMLSchema#"
+wd = "http://www.wikidata.org/entity/"
+wdt = "http://www.wikidata.org/prop/direct/"
+mb = "https://musicbrainz.org/recording/"
+jr = "http://mir.audiolabs.uni-erlangen.de/jazztube/wjazzd/records/"
+jc = "http://mir.audiolabs.uni-erlangen.de/jazztube/wjazzd/compositions/"
+js = "http://mir.audiolabs.uni-erlangen.de/jazztube/wjazzd/solos/solo/"
+jt = "http://mir.audiolabs.uni-erlangen.de/jazztube/wjazzd/tracks/"
+jtr = "http://mir.audiolabs.uni-erlangen.de/jazztube/wjazzd/transcriptions/"
+lmwj = "https://linkedmusic.ca/graphs/wjazzd/"
+
+[composition_info]
+PRIMARY_KEY = "compid"
+compid = {type = "lmwj:Composition", prefix = "jc"}
+title = "rdfs:label"
+composer = "P86"  # composer (P86)
+form = ""
+template = "P144"  # based on (P144)
+tonalitytype = ""
+genre = "P136"  # genre (P136)
+
+[record_info]
+PRIMARY_KEY = "recordid"
+recordid = {type = "lmwj:Record", prefix = "jr"}
+artist = "P175"  # performer (P175)
+recordtitle = "rdfs:label"
+label = "P264"  # record label (P264)
+recordbib = ""
+mbzid = ""
+
+[record_info.releasedate]
+pred = "P577"  # publication date (P577)
+datatype = "xsd:gYear"
+
+[solo_info]
+PRIMARY_KEY = "melid"
+melid = {type = "lmwj:Solo",prefix = "js"}
+trackid = {pred = "P361", prefix = "jt"}  # part of (P361)
+compid = {pred = "P2550", prefix = "jc"}  # recording or performance of (P2550)
+recordid = {prefix = "jr"}
+performer = "P175"  # performer (P175)
+title = "rdfs:label"
+titleaddon = ""
+solopart = ""
+instrument = "P870"  # instrumentation (P870)
+style = "P176"  # manufacturer (P176)
+avgtempo = "P1725"  # beats per minute (P1725)
+tempoclass = ""
+rhythmfeel = ""
+key = "P826"  # tonality (P826)
+signature = "P3440"  # time signature (P3440)
+chorus_count = ""
+
+[track_info]
+PRIMARY_KEY = "trackid"
+trackid = {type = "lmwj:Track", prefix = "jt"}
+filename_track = "rdfs:label"
+recordid = {pred = "P361", prefix = "jr"}  # part of (P361)
+lineup = "P175"  # performer (P175)
+instrument = "P870"  # instrumentation (P870)
+mbzid = "P4404"  # MusicBrainz recording ID (P4404)
+trackno = ""
+recordingdate = {pred = "P10135", datatype = "xsd:date"}  # recording date (P10135)
+compid = {pred = "P2550", prefix = "jc"}  # recording or performance of (P2550)
diff --git a/shared/rdf_conversion/using_rdfconv_script.md b/shared/rdf_conversion/using_rdfconv_script.md
@@ -60,13 +60,13 @@ The steps below must be completed before running the general RDF conversion scri
 - Run the following command to create a configuration file
 
 ```bash
-python -m rdfconv.tomlgen <path to csv folder> --output <config output path>
+python -m rdfconv.tomlgen --input <path to csv folder> --output <config output path>
 ```
 
 In the case of The Session, the command looks like:
 
 ```bash
-python -m rdfconv.tomlgen ../thesession/data/reconciled --output rdf_config/thesession.toml
+python -m rdfconv.tomlgen --input ../thesession/data/reconciled --output rdf_config/thesession.toml
 ```
 
 - A new TOML configuration will be created at your select output path.

diff --git a/wjazzd/.gitignore b/wjazzd/.gitignore
@@ -0,0 +1 @@
+data
diff --git a/wjazzd/README.md b/wjazzd/README.md
@@ -0,0 +1,48 @@
+# About Weimar Jazz Database
+The Weimar Jazz Database was created as part of the Jazzomat project. Much of the information can be found on the [official website](https://jazzomat.hfm-weimar.de/dbformat/dboverview.html). 
+
+For an even more in depth dive into the project, you can read chapters in the [following book](https://d1wqtxts1xzle7.cloudfront.net/55243585/inside_the_jazzomat_final_rev_oa4-libre.pdf?1512809734=&response-content-disposition=inline%3B+filename%3DInside_the_Jazzomat_New_Perspectives_for.pdf&Expires=1767825454&Signature=GuXygFuslUrc9TcEqJTsp-NZWtGMvTtDvm8-4uvCqWHFW5Fd2OXsNfHIwj6Y1PN4wGxoWO2ielG8fTfp2ZX9viXent09q7LTbipArwkMq0J~U6nfwg8DNakUtaG5i902N5Mc3Pq5jpjOFjFCt5yKVvOZxj0QV2Nap1c84YcV3aj1kZ7WPJY4iKRcGZwasLaWUqn0WJIEj3fne0DfZ5G~ygytq3ySiyJhH726cwSO4yRuocTuq80BXfMH1xoc6ZqzOcamy2~xwr3EOQw0oWt0ytvq7yr6J2hNBNhYRGmLT7ggOcPVZIrE0D5B3CStzZgA~dMWcBrWGva22c4Dz4WNaA__&Key-Pair-Id=APKAJLOHF5GGSLRBV4ZA), starting on page 19.
+
+The official abbreviation of `Weimar Jazz Database` is `WjazzD` (occasionally referred to as `WJD` as well), we will be using the lowercase string `wjazzd` to as a naming convention.
+
+[Jazztube](http://mir.audiolabs.uni-erlangen.de/jazztube/) is a related project by the team behind the Weimar Jazz Database, aiming to help visualize `WjazzD`. Since [Jazztube](http://mir.audiolabs.uni-erlangen.de/jazztube/) webpages are more informative, our URI will point to `http://mir.audiolabs.uni-erlangen.de/jazztube/` instead of `https://jazzomat.hfm-weimar.de/`. 
+
+# How to Obtain The Database
+The database can be downloaded at the [official download page](https://jazzomat.hfm-weimar.de/download/download.html) in the form of a SQLite3 database.
+
+# Ingestion Workflow
+- Change Directory to Repository Root
+- Obtain a copy of the Weimar Jazz Database SQLite file and store it at the path `/wjazzd/data/sql/wjazzd.db`
+- Install `sqlite3` if not done already
+```bash
+sudo apt install sqlite3  # Or 'brew install sqlite' on macOS
+```
+- Export all tables of SQLite file to CSV
+```bash
+mkdir -p ./wjazzd/data/raw && \
+for t in $(sqlite3 ./wjazzd/data/sql/wjazzd.db ".tables"); do
+  echo "Exporting $t"
+  sqlite3 -header -csv ./wjazzd/data/sql/wjazzd.db "SELECT * FROM $t;" \
+  > ./wjazzd/data/raw/$t.csv
+done
+```
+- Copy relevant CSV to a separate `data/processed` folder (some CSVs, like `melody.csv`, are not worth being converted to Linked Data form) 
+```bash
+mkdir -p wjazzd/data/processed && cp wjazzd/data/raw/{composition_info.csv,record_info.csv,solo_info.csv,track_info.csv,transcription_info.csv} wjazzd/data/processed/
+```
+- Reconcile processed CSV using OpenRefine: refer to [reconciliation guideline](./doc/reconciliation_procedures.md)
+- After reconciliation, review `shared/rdf_config/wjazzd.toml` to make sure that it matches your reconciled CSV. For more information on how the General RDF Conversion script works, please consult [its documentation](../shared/rdf_conversion/using_rdfconv_script.md)
+- After having reviewed the TOML file, run the general rdf conversion script using the following command from the `/shared` directory:
+```bash
+python -m rdfconv.convert rdf_config/wjazzd.toml
+```
+
+# Content of the Database
+The [official database homepage](https://jazzomat.hfm-weimar.de/dbformat/dboverview.html) provides a comprehensive overview of each table and field in the database. Below will be provided a quick overview of the entities that are ingested into the LinkedMusic Datalake
+
+## Ingested Entity Types
+- solo: a section in a recorded song where a musician is soloing. A solo is part of a song
+- track: a song. A track is part of a record (i.e. album) and contains one or more solos
+- record: an album. A record contains tracks.
+- composition: the jazz composition underlying a solo or a track. Both a solo and the track containing it are linked to the composition.
+
diff --git a/wjazzd/doc/reconciliation_procedures.md b/wjazzd/doc/reconciliation_procedures.md
@@ -0,0 +1,207 @@
+# Weimar Jazz Database OpenRefine Reconciliation Procedures
+
+This guide covers the steps to take to clean and reconcile the Weimar Jazz Database in OpenRefine. More specifically, the following CSV files will be reconciled before being converted to RDF:
+
+- `solo_info.csv`
+- `composition_info.csv`
+- `record_info.csv`
+- `track_info.csv`
+
+The JSON files located in `wjazzd/openrefine` can be used to automatically apply the procedures detailed below.
+
+## solo_info.csv
+
+- The performer column must be reconciled to the type `human (Q5)`. Some musicians need to be manually selected amongst matching people with similar names
+- The `instrument` column must be expanded using the dictionary below, before being reconciled:
+
+```json
+{
+  "as": "Alto Saxophone",
+  "bcl": "Bass Clarinet",
+  "bs": "Bass Saxophone",
+  "cl": "Clarinet",
+  "cor": "Cornet",
+  "g": "Guitar",
+  "p": "Piano",
+  "ss": "Soprano Saxophone",
+  "tb": "Trombone",
+  "tp": "Trumpet",
+  "ts": "Tenor Saxophone",
+  "ts-c": "C Melody Saxophone",
+  "vib": "Vibraphone"
+}
+```
+
+- The `style` column should be prepended `" jazz"` and then reconciled
+- The `key` column should be processed using the following Jython expression, then reconciled; strings like "C-dor" will be left unreconciled
+
+```python
+if value.endswith("maj"):
+    return value[:-3] + " major"
+elif value.endswith("min"):
+    return value[:-3] + " minor"
+else:
+    return value
+```
+
+- The `signature` column should be reconciled with time signatures
+
+## composition_info.csv
+
+- The ids in `compid` are unfortunately slightly misaligned with the ones in Jazztube, which the URI should reference. Run the following Python command to align the ids:
+```python
+num = int(value)
+if num >= 156:
+    num += 1
+
+# Step 2
+if num >= 276:
+    num += 1
+
+# Step 3
+if num >= 281:
+    num += 1
+
+return str(num)
+```
+- The column `genre` has two possible values: `"Original"` and `Great American Songbook`. The former should be deleted (it will not be stored); the latter should be reconciled to `Great American Songbook (Q1151397)`.
+- In the column `template`, the value `blues` should be expanded to `twelve-bar blues`, the column should then be reconciled
+- For the column `composer`, do the following steps:
+  1. Split multi-valued cell at `,` (e.g. `"Parker, Gillespie` should be split in two)
+  2. Spilt multi-valued cell at `/` (e.g. `Carmichael/Parish` should be split in two)
+  3. Trim leading and trailing whitespace
+  4. Create a separate `jazz musician` column, filled entirely with the value `"jazz musician"`. Reconcile the `composer` column using `jazz musician` as the `occupation (P106)`: this should improve the accuracy of reconciliation. Delete the `jazz musician` column after reconciliation.
+  5. After the first reconciliation, you will do a second reconciliation for `composer` that have been unmatched. This time, you should create a column filled with the value `"composer"`
+  6. Repeat the same process for the profession `"songwriter"`
+
+## record_info.csv
+
+- The column `artist` must have its multi-valued cells split at `/`, and then reconciled
+- The column `label` must have its multi-valued cells split at `/`, and then reconciled. This reconciliation requires slightly more manual verification.
+
+## track_info.csv
+
+- For the column `lineup` (and `instrument` column, which we will create from it), do the following steps:
+
+  1. Split multi-valued cell in the column `lineup` at `;` (e.g. `"Art Pepper (as, cl); Charles Haden (b)` should be split in two)
+  2. Create a new column `instrument` based on the `lineup` column. Use the following GREL regex: `value.match(/.*\(([^)]+)\).*/)[0]`
+  3. Split multi-valued cell in the column `instrument` at `,` (e.g. `"as, cl"` should be split in two)
+  4. Delete the parenthesis containing the instrument from the `lineup` column (e.g. `"Charles Haden (b)` becomes `Charles Haden`). Use the following GREL regex: `value.replace(/\s*\(.*\)\s*/, "")`
+  5. Trim whitespace for both columns
+  6. Expand the column `instrument` using the following dictionary
+
+  ```python
+  jazz_instruments = {
+  "arr": "",
+  "as": "Alto Saxophone",
+  "b": "Bass",
+  "B": "Bass",
+  "bc": "Bass Clarinet",
+  "bcl": "Bass Clarinet",
+  "bgo": "Baritone Guitar",
+  "bjo": "Banjo",
+  "bs": "Baritone Saxophone",
+  "cga": "Congas",
+  "cl": "Clarinet",
+  "cn": "Conga",
+  "cor": "Cornet",
+  "dr": "Drums",
+  "eb": "Electric Bass",
+  "electric p": "Electric Piano",
+  "fl": "Flute",
+  "flgn": "Flugelhorn",
+  "g": "Guitar",
+  "git": "Guitar",
+  "hca": "Harmonica",
+  "key": "Keyboard",
+  "p": "Piano",
+  "p-tp": "Piccolo Trumpet",
+  "perc": "Percussion",
+  "rhodes": "Rhodes Piano (Electric Piano)",
+  "ss": "Soprano Saxophone",
+  "synth": "Synthesizer",
+  "tb": "Trombone",
+  "tp": "Trumpet",
+  "trp": "Trumpet",
+  "ts": "Tenor Saxophone",
+  "ts-c": "Tenor Saxophone C-melody",
+  "Vc": "Cello",
+  "vcl": "Vocals",
+  "vib": "Vibraphone",
+  "voc": "Vocals"
+  }
+  ```
+
+  7. Reconcile both the `instrument` and `lineup` column
+
+- The `recordingdate` column has many entity that needs to be cleaned up:
+
+  1. Apply the following Jython command to clean up most of the badly formatted cells:
+
+  ```python
+  import re
+
+  def extract_date(value):
+      g = re.search(r"(\d{1,2})\s*\.\s*(\d{1,2})\s*\.\s*(\d{4})$", value)
+
+      if g:
+          day = g.group(1).zfill(2)
+          month = g.group(2).zfill(2)
+          year = g.group(3)
+          return year + "-" + month + "-" + day
+      else:
+          return value
+
+  return extract_date(value)
+  ```
+
+  2. Apply the following Jython command to clean up a few remaining cells in the format of `January, 1999`
+
+  ```python
+  import re
+
+  def extract_date(value):
+      month_dict = {
+      "january":   "01",
+      "february":  "02",
+      "march":     "03",
+      "april":     "04",
+      "may":       "05",
+      "june":      "06",
+      "july":      "07",
+      "august":    "08",
+      "september": "09",
+      "october":   "10",
+      "november":  "11",
+      "december":  "12"
+      }
+
+      g = re.search(r"([A-Za-z]+)\s*(\d{4})$", value)
+
+      if g:
+          day = "01"
+          month = month_dict[g.group(1).lower()]
+          year = g.group(2)
+          return year + "-" + month + "-" + day
+      else:
+          return value
+
+  return extract_date(value)
+  ```
+
+  3. Apply the following command to expand years (e.g. `1999`) to a date (e.g. `1999-01-01`)
+
+  ```python
+  import re
+
+  def extract_date(value):
+      g = re.match(r"\d{4}", value)
+
+      if g:
+          return g.group(1)+"-01-01"
+      else:
+          return value
+
+  return extract_date(value)
+  ```
+