diff --git a/spec_parser/rdf.py b/spec_parser/rdf.py index 1d34e2b..11646f0 100644 --- a/spec_parser/rdf.py +++ b/spec_parser/rdf.py @@ -17,6 +17,8 @@ from rdflib.namespace import DCTERMS, OWL, RDF, RDFS, SH, SKOS, XSD from rdflib.tools.rdf2dot import rdf2dot +from .util import unmarkdown + URI_BASE = "https://spdx.org/rdf/3.0.1/terms/" @@ -59,6 +61,16 @@ def gen_rdf_ontology(model): g.add((node, RDF.type, OWL.Ontology)) g.add((node, OWL.versionIRI, node)) g.add((node, RDFS.label, Literal("System Package Data Exchange (SPDX) Ontology", lang="en"))) + g.add( + ( + node, + SKOS.definition, + Literal( + "This ontology defines the terms and relationships used in the SPDX specification to describe system packages", + lang="en", + ), + ) + ) g.add( ( node, @@ -74,7 +86,16 @@ def gen_rdf_ontology(model): g.add((node, DCTERMS.license, URIRef("https://spdx.org/licenses/Community-Spec-1.0.html"))) g.add((node, DCTERMS.references, URIRef("https://spdx.dev/specifications/"))) g.add((node, DCTERMS.title, Literal("System Package Data Exchange (SPDX) Ontology", lang="en"))) - g.add((node, OMG_ANN.copyright, Literal("Copyright (C) 2024 SPDX Project", lang="en"))) + g.add( + ( + node, + OMG_ANN.copyright, + Literal( + "Copyright (C) 2010-2024, The Linux Foundation and its Contributors, including SPDX Model contributions from OMG and its Contributors.", + lang="en", + ), + ) + ) gen_rdf_classes(model, g) gen_rdf_properties(model, g) @@ -92,8 +113,13 @@ def gen_rdf_classes(model, g): for c in model.classes.values(): node = URIRef(c.iri) g.add((node, RDF.type, OWL.Class)) + if c.name: + g.add((node, RDFS.label, Literal(c.name))) if c.summary: - g.add((node, RDFS.comment, Literal(c.summary, lang="en"))) + g.add((node, RDFS.comment, Literal(unmarkdown(c.summary), lang="en"))) + g.add((node, SKOS.definition, Literal(unmarkdown(c.summary), lang="en"))) + if c.description: + g.add((node, SKOS.note, Literal(unmarkdown(c.description), lang="en"))) parent = c.metadata.get("SubclassOf") if parent: pns = "" if parent.startswith("/") else f"/{c.ns.name}/" @@ -164,8 +190,13 @@ def gen_rdf_properties(model, g): if fqname == "/Core/spdxId": continue node = URIRef(p.iri) + if p.name: + g.add((node, RDFS.label, Literal(p.name))) if p.summary: - g.add((node, RDFS.comment, Literal(p.summary, lang="en"))) + g.add((node, RDFS.comment, Literal(unmarkdown(p.summary), lang="en"))) + g.add((node, SKOS.definition, Literal(unmarkdown(p.summary), lang="en"))) + if p.description: + g.add((node, SKOS.note, Literal(unmarkdown(p.description), lang="en"))) if p.metadata["Nature"] == "ObjectProperty": g.add((node, RDF.type, OWL.ObjectProperty)) # to add: g.add((node, RDFS.domain, xxx)) @@ -192,22 +223,33 @@ def gen_rdf_vocabularies(model, g): for v in model.vocabularies.values(): node = URIRef(v.iri) g.add((node, RDF.type, OWL.Class)) + if v.name: + g.add((node, RDFS.label, Literal(v.name))) if v.summary: - g.add((node, RDFS.comment, Literal(v.summary, lang="en"))) + g.add((node, RDFS.comment, Literal(unmarkdown(v.summary), lang="en"))) + g.add((node, SKOS.definition, Literal(unmarkdown(v.summary), lang="en"))) + if v.description: + g.add((node, SKOS.note, Literal(unmarkdown(v.description), lang="en"))) for e, d in v.entries.items(): enode = URIRef(v.iri + "/" + e) g.add((enode, RDF.type, OWL.NamedIndividual)) g.add((enode, RDF.type, node)) g.add((enode, RDFS.label, Literal(e))) - g.add((enode, RDFS.comment, Literal(d, lang="en"))) + g.add((enode, RDFS.comment, Literal(unmarkdown(d), lang="en"))) + g.add((enode, SKOS.definition, Literal(unmarkdown(d), lang="en"))) def gen_rdf_individuals(model, g): for i in model.individuals.values(): node = URIRef(i.iri) g.add((node, RDF.type, OWL.NamedIndividual)) + if i.name: + g.add((node, RDFS.label, Literal(i.name))) if i.summary: - g.add((node, RDFS.comment, Literal(i.summary, lang="en"))) + g.add((node, RDFS.comment, Literal(unmarkdown(i.summary), lang="en"))) + g.add((node, SKOS.definition, Literal(unmarkdown(i.summary), lang="en"))) + if i.description: + g.add((node, SKOS.note, Literal(unmarkdown(i.description), lang="en"))) typ = i.metadata["type"] typename = "" if typ.startswith("/") else f"/{i.ns.name}/" typename += typ diff --git a/spec_parser/util.py b/spec_parser/util.py new file mode 100644 index 0000000..6d332d2 --- /dev/null +++ b/spec_parser/util.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: Apache-2.0 + +""" +This module provides utility functions. + +Functions: + unmarkdown(text: str) -> str: Convert Markdown text to plain text. + +Types: + ReplaceTuple: A tuple containing a compiled regex pattern and a replacement string or function. +""" + +from __future__ import annotations + +import re +from typing import Callable, Pattern, Tuple, Union + +ReplaceTuple = Tuple[Pattern, Union[str, Callable[[re.Match], str]]] + + +def _unmarkdown_repl_text_url(match: re.Match) -> str: + """ + Replacement function for Markdown links. + + [text](url) -> text + [text](../file.md) -> text + [url](url) -> + """ + text = str(match.group(1)) + url = str(match.group(2)) + if text.lower() == url.lower(): + return f"<{url}>" + elif url.startswith(".") or url.endswith(".md"): + return f"{text}" + else: + return f"{text} <{url}>" + + +# A list of (regular expression, replacement string/function), ordered by +# the sequence in which they should be applied to a Markdown text. +_unmakdown_rules: list[ReplaceTuple] = [ + # [text](url) replacements + (re.compile(r"\[(.*?)\]\((.*?)\)"), _unmarkdown_repl_text_url), + # remove code block markup + (re.compile(r"^```\S*\s*\n?", re.MULTILINE), ""), + # remove code inline markup + (re.compile(r"`([^`]+)`"), r"\1"), +] + + +def unmarkdown(text: str) -> str: + """ + Convert Markdown text to plain text by applying a series of + regular expression replacements. + + Args: + text (str): The Markdown text to be converted. + + Returns: + str: The plain text result. + """ + for pattern, replacement in _unmakdown_rules: + text = pattern.sub(replacement, text) + return text