Skip to content
Open
15 changes: 14 additions & 1 deletion .github/workflows/extract.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,21 @@ jobs:
with:
java-version: 17
distribution: 'temurin'
- name: Cache Groovy/Maven dependencies
uses: actions/cache@v4
with:
path: |
~/.groovy/grapes
~/.groovy/grapeCache
~/.ivy2/cache
~/.m2/repository
key: ${{ runner.os }}-grapes-${{ hashFiles('extractOWL.groovy') }}
restore-keys: |
${{ runner.os }}-grapes-
- name: Install dependencies
run: sudo apt install groovy
run: sudo apt-get update && sudo apt-get install -y groovy pandoc
- name: Generate temporary HTML from index.md
run: pandoc index.md -s -o index.html
- name: Extract OWL
run: groovy extractOWL.groovy | tee glossary.owl
- name: Commit OWL file
Expand Down
12 changes: 11 additions & 1 deletion extractOWL.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import org.apache.any23.Any23
import org.apache.any23.source.HTTPDocumentSource
import org.apache.any23.source.FileDocumentSource
import org.apache.any23.writer.NTriplesWriter

workspaceRoot = "../ws"
Expand All @@ -16,7 +17,15 @@ url = "https://vhp4safety.github.io/glossary/"
Any23 runner = new Any23();
runner.setHTTPUserAgent("test-user-agent");
httpClient = runner.getHTTPClient()
source = new HTTPDocumentSource(runner.getHTTPClient(), url)
// prefer a local file if provided as an argument or if index.html exists
source = null
if (this.args?.length && new File(this.args[0]).exists()) {
source = new FileDocumentSource(new File(this.args[0]))
} else if (new File('index.html').exists()) {
source = new FileDocumentSource(new File('index.html'))
} else {
source = new HTTPDocumentSource(runner.getHTTPClient(), url)
}

out = new ByteArrayOutputStream();
handler = new NTriplesWriter(out);
Expand All @@ -33,6 +42,7 @@ rdf.addPrefix(kb, "skos", "http://www.w3.org/2004/02/skos/core#")
rdf.addPrefix(kb, "dc", "http://purl.org/dc/elements/1.1/")
rdf.addPrefix(kb, "dct", "http://purl.org/dc/terms/")
rdf.addPrefix(kb, "ncit", "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#")
rdf.addPrefix(kb, "chebi", "http://purl.obolibrary.org/obo/chebi/")
rdf.addPrefix(kb, "og", "http://ogp.me/ns#")
rdf.importFromStream(kb, n3Stream, "N3")

Expand Down
Loading