From d5be7fea980dbcab16baa4356ba95313ae65ff6c Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Wed, 23 Oct 2019 17:01:23 +0200 Subject: [PATCH 01/24] Adds shacl validation to renku doctor --- renku/core/commands/checks/__init__.py | 3 + renku/core/commands/checks/validate_shacl.py | 111 +++++++ renku/core/commands/format/graph.py | 39 +-- renku/core/utils/shacl.py | 46 +++ renku/data/shacl_shape.yml | 289 +++++++++++++++++++ setup.py | 1 + 6 files changed, 464 insertions(+), 25 deletions(-) create mode 100644 renku/core/commands/checks/validate_shacl.py create mode 100644 renku/core/utils/shacl.py create mode 100644 renku/data/shacl_shape.yml diff --git a/renku/core/commands/checks/__init__.py b/renku/core/commands/checks/__init__.py index 6025f1fd4c..48ab9788b8 100644 --- a/renku/core/commands/checks/__init__.py +++ b/renku/core/commands/checks/__init__.py @@ -19,6 +19,7 @@ from .migration import check_dataset_metadata, check_missing_files from .references import check_missing_references +from .validate_shacl import check_project_structure, check_datasets_structure # Checks will be executed in the order as they are listed in __all__. # They are mostly used in ``doctor`` command to inspect broken things. @@ -26,4 +27,6 @@ 'check_dataset_metadata', 'check_missing_files', 'check_missing_references', + 'check_project_structure', + 'check_datasets_structure', ) diff --git a/renku/core/commands/checks/validate_shacl.py b/renku/core/commands/checks/validate_shacl.py new file mode 100644 index 0000000000..c3887a4d3a --- /dev/null +++ b/renku/core/commands/checks/validate_shacl.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Check KG structure using SHACL.""" +import yaml +from pyld import jsonld as ld +from rdflib.namespace import Namespace +from rdflib.term import BNode + +from renku.core.utils.shacl import validate_graph +from renku.core.models.jsonld import NoDatesSafeLoader +from renku.core.commands.echo import WARNING + + +def _shacl_graph_to_string(graph): + """Converts a shacl validation graph into human readable format.""" + sh = Namespace('http://www.w3.org/ns/shacl#') + + problems = [] + + for _, result in graph.subject_objects(sh.result): + path = graph.value(result, sh.resultPath) + res = graph.value(result, sh.resultMessage) + + if res: + message = '{}: {}'.format(path, res) + else: + kind = graph.value(result, sh.sourceConstraintComponent) + focusNode = graph.value(result, sh.focusNode) + + if isinstance(focusNode, BNode): + focusNode = '' + + message = '{}: Type: {}, Node ID: {}'.format(path, kind, focusNode) + + problems.append(message) + + return '\n\t'.join(problems) + + +def check_project_structure(client): + """Validate project metadata against SHACL.""" + project_path = client.renku_metadata_path + + conform, graph, _ = check_shacl_structure(project_path) + + if conform: + return True, None + + problems = ( + WARNING + 'Invalid structure of project metadata\n\t' + + _shacl_graph_to_string(graph) + ) + + return False, problems + + +def check_datasets_structure(client): + """Validate dataset metadata against SHACL.""" + ok = True + + problems = WARNING + 'Invalid structure of dataset metadata\n' + + for path in client.renku_datasets_path.rglob(client.METADATA): + try: + conform, graph, _ = check_shacl_structure(path) + except (Exception, BaseException) as e: + problems += 'Couldn\'t validate {}: {}\n\n'.format(path, e) + continue + + if conform: + continue + + ok = False + + problems += str(path) + '\n\t' + _shacl_graph_to_string(graph) + '\n\n' + + if ok: + return True, None + + return False, problems + + +def check_shacl_structure(path): + """Validates all metadata aginst the SHACL schema.""" + with path.open(mode='r') as fp: + source = yaml.load(fp, Loader=NoDatesSafeLoader) or {} + + rdf = ld.to_rdf( + source, + options={ + 'format': 'application/n-quads', + 'produceGeneralizedRdf': True + } + ) + + return validate_graph(rdf) diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index 4dc3eec0f1..06d6cbd05f 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -43,21 +43,28 @@ def _jsonld(graph, format, *args, **kwargs): return json.dumps(output, indent=2) -def dot(graph, simple=True, debug=False, landscape=False): - """Format graph as a dot file.""" - import sys +def _conjunctive_graph(graph): + """Convert a renku ``Graph`` to an rdflib ``ConjunctiveGraph``.""" from rdflib import ConjunctiveGraph from rdflib.plugin import register, Parser - from rdflib.tools.rdf2dot import rdf2dot register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') - g = ConjunctiveGraph().parse( + return ConjunctiveGraph().parse( data=_jsonld(graph, 'expand'), format='json-ld', ) + +def dot(graph, simple=True, debug=False, landscape=False): + """Format graph as a dot file.""" + import sys + + from rdflib.tools.rdf2dot import rdf2dot + + g = _conjunctive_graph(graph) + g.bind('prov', 'http://www.w3.org/ns/prov#') g.bind('foaf', 'http://xmlns.com/foaf/0.1/') g.bind('wfdesc', 'http://purl.org/wf4ever/wfdesc#') @@ -328,31 +335,13 @@ def jsonld_graph(graph): def nt(graph): """Format graph as n-tuples.""" - from rdflib import ConjunctiveGraph - from rdflib.plugin import register, Parser - - register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') - - click.echo( - ConjunctiveGraph().parse( - data=_jsonld(graph, 'expand'), - format='json-ld', - ).serialize(format='nt') - ) + click.echo(_conjunctive_graph(graph).serialize(format='nt')) def rdf(graph): """Output the graph as RDF.""" - from rdflib import ConjunctiveGraph - from rdflib.plugin import register, Parser - - register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') - click.echo( - ConjunctiveGraph().parse( - data=_jsonld(graph, 'expand'), - format='json-ld', - ).serialize(format='application/rdf+xml') + _conjunctive_graph(graph).serialize(format='application/rdf+xml') ) diff --git a/renku/core/utils/shacl.py b/renku/core/utils/shacl.py new file mode 100644 index 0000000000..3c0dac0c67 --- /dev/null +++ b/renku/core/utils/shacl.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2018-2019- Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""JSON-LD SHACL validation.""" + +import json +from pkg_resources import resource_string + +from pyshacl import validate +import yaml + + +def validate_graph(graph, shacl_path=None, format='nquads'): + """Validate the current graph with a SHACL schema. + + uses default schema if not supplied. + """ + if not shacl_path: + shacl_path = resource_string('renku', 'data/shacl_shape.yml') + + shacl = json.dumps(yaml.safe_load(shacl_path)) + + return validate( + graph, + shacl_graph=shacl, + inference='rdfs', + meta_shacl=True, + debug=False, + data_graph_format=format, + shacl_graph_format='json-ld', + advanced=True + ) diff --git a/renku/data/shacl_shape.yml b/renku/data/shacl_shape.yml new file mode 100644 index 0000000000..caa70cf8f9 --- /dev/null +++ b/renku/data/shacl_shape.yml @@ -0,0 +1,289 @@ +"@context": + rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# + sh: http://www.w3.org/ns/shacl# + xsd: http://www.w3.org/2001/XMLSchema# + schema: http://schema.org/ + foaf: http://xmlns.com/foaf/0.1/ + prov: http://www.w3.org/ns/prov# + closed: + "@id": sh:closed + "@type": http://www.w3.org/2001/XMLSchema#boolean + datatype: + "@id": sh:datatype + "@type": "@id" + ignoredProperties: + "@id": sh:ignoredProperties + "@container": "@list" + minCount: sh:minCount + maxCount: sh:maxCount + node: + "@id": sh:node + "@type": "@id" + nodeKind: + "@id": sh:nodeKind + "@type": "@id" + property: sh:property + path: + "@id": sh:path + "@type": "@id" + targetClass: + "@id": sh:targetClass + "@type": "@id" +"@graph": + - "@id": _:oldProjecShape + "@type": sh:NodeShape + targetClass: foaf:Project + property: + - nodeKind: sh:Literal + path: ex:CheckOldProjectMetadata + minCount: 99999 + maxCount: 99999 + sh:message: Project should be schema:Project, not foaf:Project + - "@id": _:projecShape + "@type": sh:NodeShape + ignoredProperties: + - "@id": rdf:type + closed: true + targetClass: schema:Project + property: + - nodeKind: sh:Literal + path: schema:dateCreated + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + sh:lessThanOrEquals: + "@id": schema:dateUpdated + - nodeKind: sh:Literal + path: schema:dateUpdated + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:schemaVersion + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:name + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - path: schema:creator + node: _:creatorShape + minCount: 1 + - "@id": _:creatorShape + "@type": sh:NodeShape + ignoredProperties: + - "@id": rdf:type + closed: true + targetClass: schema:Person + property: + - nodeKind: sh:Literal + path: schema:name + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:email + datatype: + "@id": xsd:string + maxCount: 1 + - nodeKind: sh:Literal + path: schema:alternateName + datatype: + "@id": xsd:string + - nodeKind: sh:Literal + path: schema:affiliation + datatype: + "@id": xsd:string + - "@id": _:datasetShape + "@type": sh:NodeShape + ignoredProperties: + - "@id": rdf:type + closed: true + targetClass: schema:Dataset + sh:filterShape: + sh:not: + sh:targetObjectsOf: + - "@id": schema:Dataset + property: + - nodeKind: sh:Literal + path: schema:isBasedOn + datatype: + "@id": xsd:string + maxCount: 1 + - nodeKind: sh:Literal + path: schema:dateCreated + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + sh:lessThanOrEquals: + "@id": schema:datePublished + - path: schema:creator + node: _:creatorShape + minCount: 1 + - nodeKind: sh:Literal + path: schema:datePublished + datatype: + "@id": xsd:string + maxCount: 1 + - nodeKind: sh:Literal + path: schema:description + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:identifier + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:keywords + datatype: + "@id": xsd:string + - nodeKind: sh:Literal + path: schema:license + datatype: + "@id": xsd:string + - nodeKind: sh:Literal + path: schema:name + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: prov:atLocation + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:sameAs + datatype: + "@id": xsd:string + - nodeKind: sh:Literal + path: schema:url + datatype: + "@id": xsd:string + - nodeKind: sh:Literal + path: schema:version + datatype: + "@id": xsd:string + - path: schema:isPartOf + node: _:projectShape + minCount: 1 + maxCount: 1 + - path: schema:subjectOf + node: _:datasetTagShape + - path: schema:hasPart + node: _:datasetFileShape + - path: schema:inLanguage + node: _:inLanguageShape + - path: schema:isPartOf + node: _:projectShape + minCount: 1 + - "@id": _:inLanguageShape + "@type": sh:NodeShape + ignoredProperties: + - "@id": rdf:type + closed: true + targetClass: schema:Language + property: + - nodeKind: sh:Literal + path: schema:name + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:alternateName + datatype: + "@id": xsd:string + - "@id": _:datasetFileShape + "@type": sh:NodeShape + ignoredProperties: + - "@id": rdf:type + closed: true + targetClass: schema:DigitalDocument + property: + - nodeKind: sh:Literal + path: schema:name + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:dateCreated + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:dateUpdated + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:url + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:version + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - path: schema:isPartOf + node: _:projectShape + minCount: 1 + - path: schema:creator + node: _:creatorShape + minCount: 1 + - "@id": _:datasetTagShape + "@type": sh:NodeShape + ignoredProperties: + - "@id": rdf:type + closed: true + targetClass: schema:PublicationEvent + property: + - nodeKind: sh:Literal + path: schema:name + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:description + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:startDate + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:location + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 + - nodeKind: sh:Literal + path: schema:about + datatype: + "@id": xsd:string + minCount: 1 + maxCount: 1 diff --git a/setup.py b/setup.py index e4d0caa120..18650e5ff0 100644 --- a/setup.py +++ b/setup.py @@ -87,6 +87,7 @@ 'PyYAML>=3.12', 'pyld>=1.0.3', 'pyOpenSSL>=19.0.0', + 'pyshacl>=0.11.3', 'python-dateutil>=2.6.1', 'python-editor>=1.0.4', 'rdflib-jsonld>=0.4.0', From f8625b33f3d8e306a757f382666584c3c774de9f Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Fri, 25 Oct 2019 12:01:35 +0200 Subject: [PATCH 02/24] Updates SHACL schema to validate current KG --- renku/core/commands/checks/validate_shacl.py | 8 ++--- renku/core/commands/format/graph.py | 1 - renku/core/utils/shacl.py | 4 +-- renku/data/shacl_shape.yml | 33 ++++++++++++++------ 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/renku/core/commands/checks/validate_shacl.py b/renku/core/commands/checks/validate_shacl.py index c3887a4d3a..73385014a1 100644 --- a/renku/core/commands/checks/validate_shacl.py +++ b/renku/core/commands/checks/validate_shacl.py @@ -21,9 +21,9 @@ from rdflib.namespace import Namespace from rdflib.term import BNode -from renku.core.utils.shacl import validate_graph -from renku.core.models.jsonld import NoDatesSafeLoader from renku.core.commands.echo import WARNING +from renku.core.models.jsonld import NoDatesSafeLoader +from renku.core.utils.shacl import validate_graph def _shacl_graph_to_string(graph): @@ -56,7 +56,7 @@ def check_project_structure(client): """Validate project metadata against SHACL.""" project_path = client.renku_metadata_path - conform, graph, _ = check_shacl_structure(project_path) + conform, graph, t = check_shacl_structure(project_path) if conform: return True, None @@ -77,7 +77,7 @@ def check_datasets_structure(client): for path in client.renku_datasets_path.rglob(client.METADATA): try: - conform, graph, _ = check_shacl_structure(path) + conform, graph, t = check_shacl_structure(path) except (Exception, BaseException) as e: problems += 'Couldn\'t validate {}: {}\n\n'.format(path, e) continue diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index 06d6cbd05f..0e8522fe30 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -45,7 +45,6 @@ def _jsonld(graph, format, *args, **kwargs): def _conjunctive_graph(graph): """Convert a renku ``Graph`` to an rdflib ``ConjunctiveGraph``.""" - from rdflib import ConjunctiveGraph from rdflib.plugin import register, Parser diff --git a/renku/core/utils/shacl.py b/renku/core/utils/shacl.py index 3c0dac0c67..ab970b665f 100644 --- a/renku/core/utils/shacl.py +++ b/renku/core/utils/shacl.py @@ -18,10 +18,10 @@ """JSON-LD SHACL validation.""" import json -from pkg_resources import resource_string -from pyshacl import validate import yaml +from pkg_resources import resource_string +from pyshacl import validate def validate_graph(graph, shacl_path=None, format='nquads'): diff --git a/renku/data/shacl_shape.yml b/renku/data/shacl_shape.yml index caa70cf8f9..9849a9578b 100644 --- a/renku/data/shacl_shape.yml +++ b/renku/data/shacl_shape.yml @@ -29,7 +29,17 @@ targetClass: "@id": sh:targetClass "@type": "@id" + target: + "@id": sh:target + "@type": "@id" "@graph": + - "@id": "schema:" + sh:declare: + - sh:prefix: + - "@value": "schema" + sh:namespace: + - "@value": "http://schema.org/" + "@type": xsd:anyURI - "@id": _:oldProjecShape "@type": sh:NodeShape targetClass: foaf:Project @@ -39,7 +49,7 @@ minCount: 99999 maxCount: 99999 sh:message: Project should be schema:Project, not foaf:Project - - "@id": _:projecShape + - "@id": _:projectShape "@type": sh:NodeShape ignoredProperties: - "@id": rdf:type @@ -105,12 +115,19 @@ "@type": sh:NodeShape ignoredProperties: - "@id": rdf:type + - "@id": schema:license closed: true - targetClass: schema:Dataset - sh:filterShape: - sh:not: - sh:targetObjectsOf: - - "@id": schema:Dataset + target: + - "@type": sh:SPARQLTarget + sh:prefixes: + - "@id": "schema:" + sh:select: + - "@value": | + SELECT ?this + WHERE { + ?this a schema:Dataset . + MINUS { ?x schema:license ?this .} + } property: - nodeKind: sh:Literal path: schema:isBasedOn @@ -149,10 +166,6 @@ path: schema:keywords datatype: "@id": xsd:string - - nodeKind: sh:Literal - path: schema:license - datatype: - "@id": xsd:string - nodeKind: sh:Literal path: schema:name datatype: From 6a83b7b9457e9459f189a465165d153a0e1aabc9 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 31 Oct 2019 16:46:04 +0100 Subject: [PATCH 03/24] Adds Unit tests for dataset and project jsonld structure --- MANIFEST.in | 3 +- renku/core/utils/shacl.py | 10 +- renku/data/shacl_shape.json | 485 ++++++++++++++++++++++++ renku/data/shacl_shape.yml | 302 --------------- tests/core/models/test_shacl_schema.py | 95 +++++ tests/fixtures/force_dataset_shacl.json | 53 +++ tests/fixtures/force_project_shacl.json | 53 +++ 7 files changed, 693 insertions(+), 308 deletions(-) create mode 100644 renku/data/shacl_shape.json delete mode 100644 renku/data/shacl_shape.yml create mode 100644 tests/core/models/test_shacl_schema.py create mode 100644 tests/fixtures/force_dataset_shacl.json create mode 100644 tests/fixtures/force_project_shacl.json diff --git a/MANIFEST.in b/MANIFEST.in index ab3e9b5e3c..8c9e058fa7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -56,6 +56,7 @@ recursive-include renku *.html recursive-include renku *.sh recursive-include renku *.txt recursive-include renku *.yml +recursive-include renku *.json recursive-include renku Dockerfile -recursive-include tests *.py *.gz *.yml +recursive-include tests *.py *.gz *.yml *.json prune .github diff --git a/renku/core/utils/shacl.py b/renku/core/utils/shacl.py index ab970b665f..b4acfb1212 100644 --- a/renku/core/utils/shacl.py +++ b/renku/core/utils/shacl.py @@ -19,7 +19,6 @@ import json -import yaml from pkg_resources import resource_string from pyshacl import validate @@ -29,10 +28,11 @@ def validate_graph(graph, shacl_path=None, format='nquads'): uses default schema if not supplied. """ - if not shacl_path: - shacl_path = resource_string('renku', 'data/shacl_shape.yml') - - shacl = json.dumps(yaml.safe_load(shacl_path)) + if shacl_path: + with open(shacl_path, 'r', encoding='utf-8') as f: + shacl = f.read() + else: + shacl = resource_string('renku', 'data/shacl_shape.json') return validate( graph, diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json new file mode 100644 index 0000000000..f905b30bd2 --- /dev/null +++ b/renku/data/shacl_shape.json @@ -0,0 +1,485 @@ +{ + "@context": { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "schema": "http://schema.org/", + "foaf": "http://xmlns.com/foaf/0.1/", + "prov": "http://www.w3.org/ns/prov#", + "closed": { + "@id": "sh:closed", + "@type": "http://www.w3.org/2001/XMLSchema#boolean" + }, + "datatype": { + "@id": "sh:datatype", + "@type": "@id" + }, + "ignoredProperties": { + "@id": "sh:ignoredProperties", + "@container": "@list" + }, + "minCount": "sh:minCount", + "maxCount": "sh:maxCount", + "node": { + "@id": "sh:node", + "@type": "@id" + }, + "nodeKind": { + "@id": "sh:nodeKind", + "@type": "@id" + }, + "property": "sh:property", + "path": { + "@id": "sh:path", + "@type": "@id" + }, + "targetClass": { + "@id": "sh:targetClass", + "@type": "@id" + }, + "target": { + "@id": "sh:target", + "@type": "@id" + } + }, + "@graph": [ + { + "@id": "schema:", + "sh:declare": [ + { + "sh:prefix": [ + { + "@value": "schema" + } + ], + "sh:namespace": [ + { + "@value": "http://schema.org/", + "@type": "xsd:anyURI" + } + ] + } + ] + }, + { + "@id": "_:oldProjecShape", + "@type": "sh:NodeShape", + "targetClass": "foaf:Project", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "ex:CheckOldProjectMetadata", + "minCount": 99999, + "maxCount": 99999, + "sh:message": "Project should be schema:Project, not foaf:Project" + } + ] + }, + { + "@id": "_:projectShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:Project", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:dateCreated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1, + "sh:lessThanOrEquals": { + "@id": "schema:dateUpdated" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateUpdated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:schemaVersion", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "path": "schema:creator", + "node": "_:creatorShape", + "minCount": 1 + } + ] + }, + { + "@id": "_:creatorShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:Person", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:email", + "datatype": { + "@id": "xsd:string" + }, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:alternateName", + "datatype": { + "@id": "xsd:string" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:affiliation", + "datatype": { + "@id": "xsd:string" + } + } + ] + }, + { + "@id": "_:datasetShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + }, + { + "@id": "schema:license" + } + ], + "closed": true, + "target": [ + { + "@type": "sh:SPARQLTarget", + "sh:prefixes": [ + { + "@id": "schema:" + } + ], + "sh:select": [ + { + "@value": "SELECT ?this\nWHERE {\n ?this a schema:Dataset .\n MINUS { ?x schema:license ?this .}\n}\n" + } + ] + } + ], + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:isBasedOn", + "datatype": { + "@id": "xsd:string" + }, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateCreated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1, + "sh:lessThanOrEquals": { + "@id": "schema:datePublished" + } + }, + { + "path": "schema:creator", + "node": "_:creatorShape", + "minCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:datePublished", + "datatype": { + "@id": "xsd:string" + }, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:description", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:identifier", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:keywords", + "datatype": { + "@id": "xsd:string" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:atLocation", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:sameAs", + "datatype": { + "@id": "xsd:string" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:url", + "datatype": { + "@id": "xsd:string" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:version", + "datatype": { + "@id": "xsd:string" + } + }, + { + "path": "schema:isPartOf", + "node": "_:projectShape", + "minCount": 1, + "maxCount": 1 + }, + { + "path": "schema:subjectOf", + "node": "_:datasetTagShape" + }, + { + "path": "schema:hasPart", + "node": "_:datasetFileShape" + }, + { + "path": "schema:inLanguage", + "node": "_:inLanguageShape" + }, + { + "path": "schema:isPartOf", + "node": "_:projectShape", + "minCount": 1 + } + ] + }, + { + "@id": "_:inLanguageShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:Language", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:alternateName", + "datatype": { + "@id": "xsd:string" + } + } + ] + }, + { + "@id": "_:datasetFileShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:DigitalDocument", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateCreated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateUpdated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:url", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:version", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "path": "schema:isPartOf", + "node": "_:projectShape", + "minCount": 1 + }, + { + "path": "schema:creator", + "node": "_:creatorShape", + "minCount": 1 + } + ] + }, + { + "@id": "_:datasetTagShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:PublicationEvent", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:description", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:startDate", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:location", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:about", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + } + ] + } + ] + } diff --git a/renku/data/shacl_shape.yml b/renku/data/shacl_shape.yml deleted file mode 100644 index 9849a9578b..0000000000 --- a/renku/data/shacl_shape.yml +++ /dev/null @@ -1,302 +0,0 @@ -"@context": - rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# - sh: http://www.w3.org/ns/shacl# - xsd: http://www.w3.org/2001/XMLSchema# - schema: http://schema.org/ - foaf: http://xmlns.com/foaf/0.1/ - prov: http://www.w3.org/ns/prov# - closed: - "@id": sh:closed - "@type": http://www.w3.org/2001/XMLSchema#boolean - datatype: - "@id": sh:datatype - "@type": "@id" - ignoredProperties: - "@id": sh:ignoredProperties - "@container": "@list" - minCount: sh:minCount - maxCount: sh:maxCount - node: - "@id": sh:node - "@type": "@id" - nodeKind: - "@id": sh:nodeKind - "@type": "@id" - property: sh:property - path: - "@id": sh:path - "@type": "@id" - targetClass: - "@id": sh:targetClass - "@type": "@id" - target: - "@id": sh:target - "@type": "@id" -"@graph": - - "@id": "schema:" - sh:declare: - - sh:prefix: - - "@value": "schema" - sh:namespace: - - "@value": "http://schema.org/" - "@type": xsd:anyURI - - "@id": _:oldProjecShape - "@type": sh:NodeShape - targetClass: foaf:Project - property: - - nodeKind: sh:Literal - path: ex:CheckOldProjectMetadata - minCount: 99999 - maxCount: 99999 - sh:message: Project should be schema:Project, not foaf:Project - - "@id": _:projectShape - "@type": sh:NodeShape - ignoredProperties: - - "@id": rdf:type - closed: true - targetClass: schema:Project - property: - - nodeKind: sh:Literal - path: schema:dateCreated - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - sh:lessThanOrEquals: - "@id": schema:dateUpdated - - nodeKind: sh:Literal - path: schema:dateUpdated - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:schemaVersion - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:name - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - path: schema:creator - node: _:creatorShape - minCount: 1 - - "@id": _:creatorShape - "@type": sh:NodeShape - ignoredProperties: - - "@id": rdf:type - closed: true - targetClass: schema:Person - property: - - nodeKind: sh:Literal - path: schema:name - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:email - datatype: - "@id": xsd:string - maxCount: 1 - - nodeKind: sh:Literal - path: schema:alternateName - datatype: - "@id": xsd:string - - nodeKind: sh:Literal - path: schema:affiliation - datatype: - "@id": xsd:string - - "@id": _:datasetShape - "@type": sh:NodeShape - ignoredProperties: - - "@id": rdf:type - - "@id": schema:license - closed: true - target: - - "@type": sh:SPARQLTarget - sh:prefixes: - - "@id": "schema:" - sh:select: - - "@value": | - SELECT ?this - WHERE { - ?this a schema:Dataset . - MINUS { ?x schema:license ?this .} - } - property: - - nodeKind: sh:Literal - path: schema:isBasedOn - datatype: - "@id": xsd:string - maxCount: 1 - - nodeKind: sh:Literal - path: schema:dateCreated - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - sh:lessThanOrEquals: - "@id": schema:datePublished - - path: schema:creator - node: _:creatorShape - minCount: 1 - - nodeKind: sh:Literal - path: schema:datePublished - datatype: - "@id": xsd:string - maxCount: 1 - - nodeKind: sh:Literal - path: schema:description - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:identifier - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:keywords - datatype: - "@id": xsd:string - - nodeKind: sh:Literal - path: schema:name - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: prov:atLocation - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:sameAs - datatype: - "@id": xsd:string - - nodeKind: sh:Literal - path: schema:url - datatype: - "@id": xsd:string - - nodeKind: sh:Literal - path: schema:version - datatype: - "@id": xsd:string - - path: schema:isPartOf - node: _:projectShape - minCount: 1 - maxCount: 1 - - path: schema:subjectOf - node: _:datasetTagShape - - path: schema:hasPart - node: _:datasetFileShape - - path: schema:inLanguage - node: _:inLanguageShape - - path: schema:isPartOf - node: _:projectShape - minCount: 1 - - "@id": _:inLanguageShape - "@type": sh:NodeShape - ignoredProperties: - - "@id": rdf:type - closed: true - targetClass: schema:Language - property: - - nodeKind: sh:Literal - path: schema:name - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:alternateName - datatype: - "@id": xsd:string - - "@id": _:datasetFileShape - "@type": sh:NodeShape - ignoredProperties: - - "@id": rdf:type - closed: true - targetClass: schema:DigitalDocument - property: - - nodeKind: sh:Literal - path: schema:name - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:dateCreated - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:dateUpdated - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:url - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:version - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - path: schema:isPartOf - node: _:projectShape - minCount: 1 - - path: schema:creator - node: _:creatorShape - minCount: 1 - - "@id": _:datasetTagShape - "@type": sh:NodeShape - ignoredProperties: - - "@id": rdf:type - closed: true - targetClass: schema:PublicationEvent - property: - - nodeKind: sh:Literal - path: schema:name - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:description - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:startDate - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:location - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 - - nodeKind: sh:Literal - path: schema:about - datatype: - "@id": xsd:string - minCount: 1 - maxCount: 1 diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py new file mode 100644 index 0000000000..5c0ebe50d7 --- /dev/null +++ b/tests/core/models/test_shacl_schema.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2019- Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""test KG against SHACL shape.""" + +from pathlib import Path + +from pyld import jsonld as ld + +from renku.cli import cli +from renku.core.utils.shacl import validate_graph + + +def test_dataset_shacl(tmpdir, runner, project, client): + """Test dataset metadata structure.""" + path = Path( + __file__ + ).parent.parent.parent / 'fixtures' / 'force_dataset_shacl.json' + + runner.invoke(cli, ['dataset', 'create', 'dataset']) + + paths = [] + for i in range(3): + new_file = tmpdir.join('file_{0}'.format(i)) + new_file.write(str(i)) + paths.append(str(new_file)) + + # add data + runner.invoke( + cli, + ['dataset', 'add', 'dataset'] + paths, + catch_exceptions=False, + ) + + runner.invoke( + cli, + ['dataset', 'tag', 'dataset', '1.0'], + catch_exceptions=False, + ) + + with client.with_dataset('dataset') as dataset: + g = dataset.asjsonld() + rdf = ld.to_rdf( + g, + options={ + 'format': 'application/n-quads', + 'produceGeneralizedRdf': False + } + ) + r, _, t = validate_graph(rdf, shacl_path=path) + assert r is True, t + + r, _, t = validate_graph(rdf) + assert r is True, t + + +def test_project_shacl(project, client): + """Test project metadata structure.""" + from renku.core.models.creators import Creator + from renku.core.models.projects import Project + + path = Path( + __file__ + ).parent.parent.parent / 'fixtures' / 'force_project_shacl.json' + + project = client.project + project.creator = Creator(email='johndoe@example.com', name='Johnny Doe') + + g = project.asjsonld() + rdf = ld.to_rdf( + g, + options={ + 'format': 'application/n-quads', + 'produceGeneralizedRdf': False + } + ) + r, _, t = validate_graph(rdf, shacl_path=path) + assert r is True, t + + r, _, t = validate_graph(rdf) + assert r is True, t diff --git a/tests/fixtures/force_dataset_shacl.json b/tests/fixtures/force_dataset_shacl.json new file mode 100644 index 0000000000..3691bbe6dc --- /dev/null +++ b/tests/fixtures/force_dataset_shacl.json @@ -0,0 +1,53 @@ +{ + "@context": { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "schema": "http://schema.org/", + "foaf": "http://xmlns.com/foaf/0.1/", + "prov": "http://www.w3.org/ns/prov#", + "minCount": "sh:minCount", + "property": "sh:property", + "target": { + "@id": "sh:target", + "@type": "@id" + } + }, + "@graph": [ + { + "@id": "schema:", + "sh:declare": [ + { + "sh:prefix": [ + { + "@value": "schema" + } + ], + "sh:namespace": [ + { + "@value": "http://schema.org/", + "@type": "xsd:anyURI" + } + ] + } + ] + }, + { + "@id": "_:forceDatasetShape", + "@type": "sh:NodeShape", + "targetNode": "schema:Dataset", + "property": [ + { + "sh:path": [ + { + "sh:inversePath": { + "@id": "rdf:type" + } + } + ], + "minCount": 1 + } + ] + } + ] +} diff --git a/tests/fixtures/force_project_shacl.json b/tests/fixtures/force_project_shacl.json new file mode 100644 index 0000000000..a7a6989746 --- /dev/null +++ b/tests/fixtures/force_project_shacl.json @@ -0,0 +1,53 @@ +{ + "@context": { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "schema": "http://schema.org/", + "foaf": "http://xmlns.com/foaf/0.1/", + "prov": "http://www.w3.org/ns/prov#", + "minCount": "sh:minCount", + "property": "sh:property", + "target": { + "@id": "sh:target", + "@type": "@id" + } + }, + "@graph": [ + { + "@id": "schema:", + "sh:declare": [ + { + "sh:prefix": [ + { + "@value": "schema" + } + ], + "sh:namespace": [ + { + "@value": "http://schema.org/", + "@type": "xsd:anyURI" + } + ] + } + ] + }, + { + "@id": "_:forceProjectShape", + "@type": "sh:NodeShape", + "targetNode": "schema:Project", + "property": [ + { + "sh:path": [ + { + "sh:inversePath": { + "@id": "rdf:type" + } + } + ], + "minCount": 1 + } + ] + } + ] +} From 7a089044e92dda9eaceec283a8fdc4dfb77add01 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Fri, 1 Nov 2019 11:31:03 +0100 Subject: [PATCH 04/24] Improves SHACL tests for project and dataset jsonld --- tests/core/models/test_shacl_schema.py | 21 +++++++++++-- tests/fixtures/force_dataset_shacl.json | 35 +++------------------ tests/fixtures/force_datasetfile_shacl.json | 26 +++++++++++++++ tests/fixtures/force_datasettag_shacl.json | 26 +++++++++++++++ tests/fixtures/force_project_shacl.json | 35 +++------------------ 5 files changed, 78 insertions(+), 65 deletions(-) create mode 100644 tests/fixtures/force_datasetfile_shacl.json create mode 100644 tests/fixtures/force_datasettag_shacl.json diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py index 5c0ebe50d7..fd8ab6c586 100644 --- a/tests/core/models/test_shacl_schema.py +++ b/tests/core/models/test_shacl_schema.py @@ -27,10 +27,18 @@ def test_dataset_shacl(tmpdir, runner, project, client): """Test dataset metadata structure.""" - path = Path( + force_dataset_path = Path( __file__ ).parent.parent.parent / 'fixtures' / 'force_dataset_shacl.json' + force_datasetfile_path = Path( + __file__ + ).parent.parent.parent / 'fixtures' / 'force_datasetfile_shacl.json' + + force_datasettag_path = Path( + __file__ + ).parent.parent.parent / 'fixtures' / 'force_datasettag_shacl.json' + runner.invoke(cli, ['dataset', 'create', 'dataset']) paths = [] @@ -58,10 +66,17 @@ def test_dataset_shacl(tmpdir, runner, project, client): g, options={ 'format': 'application/n-quads', - 'produceGeneralizedRdf': False + 'produceGeneralizedRdf': True } ) - r, _, t = validate_graph(rdf, shacl_path=path) + + r, _, t = validate_graph(rdf, shacl_path=force_dataset_path) + assert r is True, t + + r, _, t = validate_graph(rdf, shacl_path=force_datasetfile_path) + assert r is True, t + + r, _, t = validate_graph(rdf, shacl_path=force_datasettag_path) assert r is True, t r, _, t = validate_graph(rdf) diff --git a/tests/fixtures/force_dataset_shacl.json b/tests/fixtures/force_dataset_shacl.json index 3691bbe6dc..8dd3078843 100644 --- a/tests/fixtures/force_dataset_shacl.json +++ b/tests/fixtures/force_dataset_shacl.json @@ -2,41 +2,14 @@ "@context": { "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "sh": "http://www.w3.org/ns/shacl#", - "xsd": "http://www.w3.org/2001/XMLSchema#", - "schema": "http://schema.org/", - "foaf": "http://xmlns.com/foaf/0.1/", - "prov": "http://www.w3.org/ns/prov#", - "minCount": "sh:minCount", - "property": "sh:property", - "target": { - "@id": "sh:target", - "@type": "@id" - } + "schema": "http://schema.org/" }, "@graph": [ - { - "@id": "schema:", - "sh:declare": [ - { - "sh:prefix": [ - { - "@value": "schema" - } - ], - "sh:namespace": [ - { - "@value": "http://schema.org/", - "@type": "xsd:anyURI" - } - ] - } - ] - }, { "@id": "_:forceDatasetShape", "@type": "sh:NodeShape", - "targetNode": "schema:Dataset", - "property": [ + "sh:targetNode": "schema:Dataset", + "sh:property": [ { "sh:path": [ { @@ -45,7 +18,7 @@ } } ], - "minCount": 1 + "sh:minCount": 1 } ] } diff --git a/tests/fixtures/force_datasetfile_shacl.json b/tests/fixtures/force_datasetfile_shacl.json new file mode 100644 index 0000000000..34e4993dd5 --- /dev/null +++ b/tests/fixtures/force_datasetfile_shacl.json @@ -0,0 +1,26 @@ +{ + "@context": { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "schema": "http://schema.org/" + }, + "@graph": [ + { + "@id": "_:forceDatasetShape", + "@type": "sh:NodeShape", + "sh:targetNode": "schema:DigitalDocument", + "sh:property": [ + { + "sh:path": [ + { + "sh:inversePath": [{ + "@id": "rdf:type" + }] + } + ], + "sh:minCount": 1 + } + ] + } + ] +} diff --git a/tests/fixtures/force_datasettag_shacl.json b/tests/fixtures/force_datasettag_shacl.json new file mode 100644 index 0000000000..e5987723f0 --- /dev/null +++ b/tests/fixtures/force_datasettag_shacl.json @@ -0,0 +1,26 @@ +{ + "@context": { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "schema": "http://schema.org/" + }, + "@graph": [ + { + "@id": "_:forceDatasetShape", + "@type": "sh:NodeShape", + "sh:targetNode": "schema:PublicationEvent", + "sh:property": [ + { + "sh:path": [ + { + "sh:inversePath": [{ + "@id": "rdf:type" + }] + } + ], + "sh:minCount": 1 + } + ] + } + ] +} diff --git a/tests/fixtures/force_project_shacl.json b/tests/fixtures/force_project_shacl.json index a7a6989746..1ac5b928c4 100644 --- a/tests/fixtures/force_project_shacl.json +++ b/tests/fixtures/force_project_shacl.json @@ -2,41 +2,14 @@ "@context": { "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "sh": "http://www.w3.org/ns/shacl#", - "xsd": "http://www.w3.org/2001/XMLSchema#", - "schema": "http://schema.org/", - "foaf": "http://xmlns.com/foaf/0.1/", - "prov": "http://www.w3.org/ns/prov#", - "minCount": "sh:minCount", - "property": "sh:property", - "target": { - "@id": "sh:target", - "@type": "@id" - } + "schema": "http://schema.org/" }, "@graph": [ - { - "@id": "schema:", - "sh:declare": [ - { - "sh:prefix": [ - { - "@value": "schema" - } - ], - "sh:namespace": [ - { - "@value": "http://schema.org/", - "@type": "xsd:anyURI" - } - ] - } - ] - }, { "@id": "_:forceProjectShape", "@type": "sh:NodeShape", - "targetNode": "schema:Project", - "property": [ + "sh:targetNode": "schema:Project", + "sh:property": [ { "sh:path": [ { @@ -45,7 +18,7 @@ } } ], - "minCount": 1 + "sh:minCount": 1 } ] } From d1243f373d05202dcf659b66998a70a2a383a3a3 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Fri, 1 Nov 2019 17:14:12 +0100 Subject: [PATCH 05/24] Fixes SHACl validation and adds validation to renku log --- renku/cli/log.py | 10 +++- renku/core/commands/format/graph.py | 62 +++++++++++++++++++----- renku/core/models/provenance/entities.py | 4 +- renku/data/shacl_shape.json | 36 +++++++------- tests/cli/test_update.py | 6 +++ tests/core/models/test_shacl_schema.py | 1 - 6 files changed, 86 insertions(+), 33 deletions(-) diff --git a/renku/cli/log.py b/renku/cli/log.py index fa4fc83dfe..dc7edfb168 100644 --- a/renku/cli/log.py +++ b/renku/cli/log.py @@ -86,9 +86,15 @@ default=False, help='Display commands without output files.' ) +@click.option( + '--strict', + is_flag=True, + default=False, + help='Validate triples before output.' +) @click.argument('paths', type=click.Path(exists=True), nargs=-1) @pass_local_client -def log(client, revision, format, no_output, paths): +def log(client, revision, format, no_output, strict, paths): """Show logs for a file.""" graph = Graph(client) if not paths: @@ -108,4 +114,4 @@ def log(client, revision, format, no_output, paths): # NOTE shall we warn when "not no_output and not paths"? graph.build(paths=paths, revision=revision, can_be_cwl=no_output) - FORMATS[format](graph) + FORMATS[format](graph, strict=strict) diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index 0e8522fe30..8207228934 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -21,12 +21,17 @@ import click +from renku.core.utils.shacl import validate_graph -def ascii(graph): + +def ascii(graph, strict=False): """Format graph as an ASCII art.""" from ..ascii import DAG from ..echo import echo_via_pager + if strict: + raise click.BadParameter("--strict not supported for json-ld-graph") + echo_via_pager(str(DAG(graph))) @@ -56,12 +61,15 @@ def _conjunctive_graph(graph): ) -def dot(graph, simple=True, debug=False, landscape=False): +def dot(graph, simple=True, debug=False, landscape=False, strict=False): """Format graph as a dot file.""" import sys from rdflib.tools.rdf2dot import rdf2dot + if strict: + raise click.BadParameter("--strict not supported for json-ld-graph") + g = _conjunctive_graph(graph) g.bind('prov', 'http://www.w3.org/ns/prov#') @@ -299,10 +307,13 @@ def color(p): stream.write('}\n') -def makefile(graph): +def makefile(graph, strict=False): """Format graph as Makefile.""" from renku.core.models.provenance.activities import ProcessRun, WorkflowRun + if strict: + raise click.BadParameter("--strict not supported for json-ld-graph") + for activity in graph.activities.values(): if not isinstance(activity, ProcessRun): continue @@ -322,26 +333,53 @@ def makefile(graph): ) -def jsonld(graph): +def jsonld(graph, strict=False): """Format graph as JSON-LD file.""" - click.echo(_jsonld(graph, 'expand')) + ld = _jsonld(graph, 'expand') + + if strict: + r, _, t = validate_graph(ld, format='json-ld') + + if not r: + click.BadParameter( + "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) + ) + click.echo(ld) -def jsonld_graph(graph): +def jsonld_graph(graph, strict=False): """Format graph as JSON-LD graph file.""" + if strict: + raise click.BadParameter("--strict not supported for json-ld-graph") click.echo(_jsonld(graph, 'flatten')) -def nt(graph): +def nt(graph, strict=False): """Format graph as n-tuples.""" - click.echo(_conjunctive_graph(graph).serialize(format='nt')) + nt = _conjunctive_graph(graph).serialize(format='nt') + if strict: + r, _, t = validate_graph(nt, format='nt') + if not r: + click.BadParameter( + "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) + ) + + click.echo(nt) -def rdf(graph): + +def rdf(graph, strict=False): """Output the graph as RDF.""" - click.echo( - _conjunctive_graph(graph).serialize(format='application/rdf+xml') - ) + xml = _conjunctive_graph(graph).serialize(format='application/rdf+xml') + if strict: + r, _, t = validate_graph(xml, format='xml') + + if not r: + click.BadParameter( + "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) + ) + + click.echo() FORMATS = { diff --git a/renku/core/models/provenance/entities.py b/renku/core/models/provenance/entities.py index 4fdc74dddf..db5b506f75 100644 --- a/renku/core/models/provenance/entities.py +++ b/renku/core/models/provenance/entities.py @@ -62,7 +62,9 @@ def default_id(self): hexsha = self.commit.hexsha else: hexsha = 'UNCOMMITTED' - return 'blob/{hexsha}/{self.path}'.format(hexsha=hexsha, self=self) + return 'file://blob/{hexsha}/{self.path}'.format( + hexsha=hexsha, self=self + ) @_label.default def default_label(self): diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json index f905b30bd2..03cf9d9168 100644 --- a/renku/data/shacl_shape.json +++ b/renku/data/shacl_shape.json @@ -18,6 +18,10 @@ "@id": "sh:ignoredProperties", "@container": "@list" }, + "or": { + "@id": "sh:or", + "@container": "@list" + }, "minCount": "sh:minCount", "maxCount": "sh:maxCount", "node": { @@ -42,6 +46,7 @@ "@type": "@id" } }, + "version": "0.7.1.dev7+dirty", "@graph": [ { "@id": "schema:", @@ -318,11 +323,6 @@ { "path": "schema:inLanguage", "node": "_:inLanguageShape" - }, - { - "path": "schema:isPartOf", - "node": "_:projectShape", - "minCount": 1 } ] }, @@ -384,15 +384,6 @@ "minCount": 1, "maxCount": 1 }, - { - "nodeKind": "sh:Literal", - "path": "schema:dateUpdated", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, { "nodeKind": "sh:Literal", "path": "schema:url", @@ -404,7 +395,7 @@ }, { "nodeKind": "sh:Literal", - "path": "schema:version", + "path": "prov:atLocation", "datatype": { "@id": "xsd:string" }, @@ -413,8 +404,19 @@ }, { "path": "schema:isPartOf", - "node": "_:projectShape", - "minCount": 1 + "or": [ + { + "sh:class": { + "@id": "schema:Project" + } + }, + { + "nodeKind": "sh:Literal", + "datatype": { + "@id": "xsd:string" + } + } + ] }, { "path": "schema:creator", diff --git a/tests/cli/test_update.py b/tests/cli/test_update.py index 59593dcc38..98309aea55 100644 --- a/tests/cli/test_update.py +++ b/tests/cli/test_update.py @@ -35,6 +35,8 @@ def update_and_commit(data, file_, repo): def test_update(runner, project, run): """Test automatic file update.""" + from renku.core.utils.shacl import validate_graph + cwd = Path(project) data = cwd / 'data' data.mkdir() @@ -94,6 +96,10 @@ def test_update(runner, project, run): assert 0 == result.exit_code, output_format assert source.name in result.output, output_format + if output_format == 'nt': + r, _, t = validate_graph(result.output) + assert r is True, t + def test_workflow_without_outputs(runner, project, run): """Test workflow without outputs.""" diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py index fd8ab6c586..26163b9f94 100644 --- a/tests/core/models/test_shacl_schema.py +++ b/tests/core/models/test_shacl_schema.py @@ -86,7 +86,6 @@ def test_dataset_shacl(tmpdir, runner, project, client): def test_project_shacl(project, client): """Test project metadata structure.""" from renku.core.models.creators import Creator - from renku.core.models.projects import Project path = Path( __file__ From e3bc01738f38809de1cc87a1e1a9d133c4fabcce Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Tue, 5 Nov 2019 13:41:47 +0100 Subject: [PATCH 06/24] Adds renku log tests --- renku/core/commands/format/graph.py | 8 ++-- tests/cli/test_log.py | 66 +++++++++++++++++++++++++++++ tests/cli/test_migrate.py | 2 +- 3 files changed, 71 insertions(+), 5 deletions(-) create mode 100644 tests/cli/test_log.py diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index 8207228934..055baea94f 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -30,7 +30,7 @@ def ascii(graph, strict=False): from ..echo import echo_via_pager if strict: - raise click.BadParameter("--strict not supported for json-ld-graph") + raise click.BadParameter('--strict not supported for json-ld-graph') echo_via_pager(str(DAG(graph))) @@ -68,7 +68,7 @@ def dot(graph, simple=True, debug=False, landscape=False, strict=False): from rdflib.tools.rdf2dot import rdf2dot if strict: - raise click.BadParameter("--strict not supported for json-ld-graph") + raise click.BadParameter('--strict not supported for json-ld-graph') g = _conjunctive_graph(graph) @@ -312,7 +312,7 @@ def makefile(graph, strict=False): from renku.core.models.provenance.activities import ProcessRun, WorkflowRun if strict: - raise click.BadParameter("--strict not supported for json-ld-graph") + raise click.BadParameter('--strict not supported for json-ld-graph') for activity in graph.activities.values(): if not isinstance(activity, ProcessRun): @@ -350,7 +350,7 @@ def jsonld(graph, strict=False): def jsonld_graph(graph, strict=False): """Format graph as JSON-LD graph file.""" if strict: - raise click.BadParameter("--strict not supported for json-ld-graph") + raise click.BadParameter('--strict not supported for json-ld-graph') click.echo(_jsonld(graph, 'flatten')) diff --git a/tests/cli/test_log.py b/tests/cli/test_log.py new file mode 100644 index 0000000000..140cfb9ead --- /dev/null +++ b/tests/cli/test_log.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2017-2019 - Swiss Data Science Center (SDSC) +# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and +# Eidgenössische Technische Hochschule Zürich (ETHZ). +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test ``log`` command.""" + +from __future__ import absolute_import, print_function + +import pytest + +from renku.cli import cli + + +@pytest.mark.shelled +@pytest.mark.parametrize('format', ['json-ld', 'nt', 'rdf']) +def test_run_log_strict(runner, project, run_shell, format): + """Test log output of run command.""" + # Run a shell command with pipe. + result = run_shell('renku run echo "a" > output') + + # Assert created output file. + result = runner.invoke( + cli, ['log', '--strict', '--format={}'.format(format)] + ) + assert 0 == result.exit_code, result.output + assert '.renku/workflow/' in result.output + + +@pytest.mark.shelled +@pytest.mark.parametrize('format', ['json-ld', 'nt', 'rdf']) +def test_dataset_log_strict(tmpdir, runner, project, client, format): + """Test output of log for dataset add.""" + result = runner.invoke(cli, ['dataset', 'create', 'my-dataset']) + assert 0 == result.exit_code + + paths = [] + for i in range(3): + new_file = tmpdir.join('file_{0}'.format(i)) + new_file.write(str(i)) + paths.append(str(new_file)) + + # add data + result = runner.invoke( + cli, + ['dataset', 'add', 'my-dataset'] + paths, + ) + assert 0 == result.exit_code + + result = runner.invoke( + cli, ['log', '--strict', '--format={}'.format(format)] + ) + assert 0 == result.exit_code, result.output + assert all(p in result.output for p in paths) diff --git a/tests/cli/test_migrate.py b/tests/cli/test_migrate.py index 59c597bdca..70306de76d 100644 --- a/tests/cli/test_migrate.py +++ b/tests/cli/test_migrate.py @@ -146,7 +146,7 @@ def test_graph_building_after_migration(isolated_runner, old_project): result = isolated_runner.invoke(cli, ['migrate', 'datasets']) assert 0 == result.exit_code - result = isolated_runner.invoke(cli, ['log']) + result = isolated_runner.invoke(cli, ['log', '--strict']) assert 0 == result.exit_code From 52c5bcc3edc7e375053a74057c702082bf94f69e Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Tue, 5 Nov 2019 16:49:09 +0100 Subject: [PATCH 07/24] Adds some provenance types to the SHACL graph --- renku/data/shacl_shape.json | 1149 ++++++++++++++++++++--------------- 1 file changed, 663 insertions(+), 486 deletions(-) diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json index 03cf9d9168..3148da80dc 100644 --- a/renku/data/shacl_shape.json +++ b/renku/data/shacl_shape.json @@ -1,487 +1,664 @@ { - "@context": { - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "sh": "http://www.w3.org/ns/shacl#", - "xsd": "http://www.w3.org/2001/XMLSchema#", - "schema": "http://schema.org/", - "foaf": "http://xmlns.com/foaf/0.1/", - "prov": "http://www.w3.org/ns/prov#", - "closed": { - "@id": "sh:closed", - "@type": "http://www.w3.org/2001/XMLSchema#boolean" - }, - "datatype": { - "@id": "sh:datatype", - "@type": "@id" - }, - "ignoredProperties": { - "@id": "sh:ignoredProperties", - "@container": "@list" - }, - "or": { - "@id": "sh:or", - "@container": "@list" - }, - "minCount": "sh:minCount", - "maxCount": "sh:maxCount", - "node": { - "@id": "sh:node", - "@type": "@id" - }, - "nodeKind": { - "@id": "sh:nodeKind", - "@type": "@id" - }, - "property": "sh:property", - "path": { - "@id": "sh:path", - "@type": "@id" - }, - "targetClass": { - "@id": "sh:targetClass", - "@type": "@id" - }, - "target": { - "@id": "sh:target", - "@type": "@id" - } - }, - "version": "0.7.1.dev7+dirty", - "@graph": [ - { - "@id": "schema:", - "sh:declare": [ - { - "sh:prefix": [ - { - "@value": "schema" - } - ], - "sh:namespace": [ - { - "@value": "http://schema.org/", - "@type": "xsd:anyURI" - } - ] - } - ] - }, - { - "@id": "_:oldProjecShape", - "@type": "sh:NodeShape", - "targetClass": "foaf:Project", - "property": [ - { - "nodeKind": "sh:Literal", - "path": "ex:CheckOldProjectMetadata", - "minCount": 99999, - "maxCount": 99999, - "sh:message": "Project should be schema:Project, not foaf:Project" - } - ] - }, - { - "@id": "_:projectShape", - "@type": "sh:NodeShape", - "ignoredProperties": [ - { - "@id": "rdf:type" - } - ], - "closed": true, - "targetClass": "schema:Project", - "property": [ - { - "nodeKind": "sh:Literal", - "path": "schema:dateCreated", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1, - "sh:lessThanOrEquals": { - "@id": "schema:dateUpdated" - } - }, - { - "nodeKind": "sh:Literal", - "path": "schema:dateUpdated", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:schemaVersion", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:name", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "path": "schema:creator", - "node": "_:creatorShape", - "minCount": 1 - } - ] - }, - { - "@id": "_:creatorShape", - "@type": "sh:NodeShape", - "ignoredProperties": [ - { - "@id": "rdf:type" - } - ], - "closed": true, - "targetClass": "schema:Person", - "property": [ - { - "nodeKind": "sh:Literal", - "path": "schema:name", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:email", - "datatype": { - "@id": "xsd:string" - }, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:alternateName", - "datatype": { - "@id": "xsd:string" - } - }, - { - "nodeKind": "sh:Literal", - "path": "schema:affiliation", - "datatype": { - "@id": "xsd:string" - } - } - ] - }, - { - "@id": "_:datasetShape", - "@type": "sh:NodeShape", - "ignoredProperties": [ - { - "@id": "rdf:type" - }, - { - "@id": "schema:license" - } - ], - "closed": true, - "target": [ - { - "@type": "sh:SPARQLTarget", - "sh:prefixes": [ - { - "@id": "schema:" - } - ], - "sh:select": [ - { - "@value": "SELECT ?this\nWHERE {\n ?this a schema:Dataset .\n MINUS { ?x schema:license ?this .}\n}\n" - } - ] - } - ], - "property": [ - { - "nodeKind": "sh:Literal", - "path": "schema:isBasedOn", - "datatype": { - "@id": "xsd:string" - }, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:dateCreated", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1, - "sh:lessThanOrEquals": { - "@id": "schema:datePublished" - } - }, - { - "path": "schema:creator", - "node": "_:creatorShape", - "minCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:datePublished", - "datatype": { - "@id": "xsd:string" - }, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:description", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:identifier", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:keywords", - "datatype": { - "@id": "xsd:string" - } - }, - { - "nodeKind": "sh:Literal", - "path": "schema:name", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "prov:atLocation", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:sameAs", - "datatype": { - "@id": "xsd:string" - } - }, - { - "nodeKind": "sh:Literal", - "path": "schema:url", - "datatype": { - "@id": "xsd:string" - } - }, - { - "nodeKind": "sh:Literal", - "path": "schema:version", - "datatype": { - "@id": "xsd:string" - } - }, - { - "path": "schema:isPartOf", - "node": "_:projectShape", - "minCount": 1, - "maxCount": 1 - }, - { - "path": "schema:subjectOf", - "node": "_:datasetTagShape" - }, - { - "path": "schema:hasPart", - "node": "_:datasetFileShape" - }, - { - "path": "schema:inLanguage", - "node": "_:inLanguageShape" - } - ] - }, - { - "@id": "_:inLanguageShape", - "@type": "sh:NodeShape", - "ignoredProperties": [ - { - "@id": "rdf:type" - } - ], - "closed": true, - "targetClass": "schema:Language", - "property": [ - { - "nodeKind": "sh:Literal", - "path": "schema:name", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:alternateName", - "datatype": { - "@id": "xsd:string" - } - } - ] - }, - { - "@id": "_:datasetFileShape", - "@type": "sh:NodeShape", - "ignoredProperties": [ - { - "@id": "rdf:type" - } - ], - "closed": true, - "targetClass": "schema:DigitalDocument", - "property": [ - { - "nodeKind": "sh:Literal", - "path": "schema:name", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:dateCreated", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:url", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "prov:atLocation", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "path": "schema:isPartOf", - "or": [ - { - "sh:class": { - "@id": "schema:Project" - } - }, - { - "nodeKind": "sh:Literal", - "datatype": { - "@id": "xsd:string" - } - } - ] - }, - { - "path": "schema:creator", - "node": "_:creatorShape", - "minCount": 1 - } - ] - }, - { - "@id": "_:datasetTagShape", - "@type": "sh:NodeShape", - "ignoredProperties": [ - { - "@id": "rdf:type" - } - ], - "closed": true, - "targetClass": "schema:PublicationEvent", - "property": [ - { - "nodeKind": "sh:Literal", - "path": "schema:name", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:description", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:startDate", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:location", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - }, - { - "nodeKind": "sh:Literal", - "path": "schema:about", - "datatype": { - "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 - } - ] - } - ] - } + "@context": { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "sh": "http://www.w3.org/ns/shacl#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "schema": "http://schema.org/", + "foaf": "http://xmlns.com/foaf/0.1/", + "prov": "http://www.w3.org/ns/prov#", + "closed": { + "@id": "sh:closed", + "@type": "http://www.w3.org/2001/XMLSchema#boolean" + }, + "datatype": { + "@id": "sh:datatype", + "@type": "@id" + }, + "ignoredProperties": { + "@id": "sh:ignoredProperties", + "@container": "@list" + }, + "or": { + "@id": "sh:or", + "@container": "@list" + }, + "minCount": "sh:minCount", + "maxCount": "sh:maxCount", + "node": { + "@id": "sh:node", + "@type": "@id" + }, + "nodeKind": { + "@id": "sh:nodeKind", + "@type": "@id" + }, + "property": "sh:property", + "path": { + "@id": "sh:path", + "@type": "@id" + }, + "targetClass": { + "@id": "sh:targetClass", + "@type": "@id" + }, + "target": { + "@id": "sh:target", + "@type": "@id" + } + }, + "version": "0.7.1.dev7+dirty", + "@graph": [ + { + "@id": "schema:", + "sh:declare": [ + { + "sh:prefix": [ + { + "@value": "schema" + } + ], + "sh:namespace": [ + { + "@value": "http://schema.org/", + "@type": "xsd:anyURI" + } + ] + } + ] + }, + { + "@id": "prov:", + "sh:declare": [ + { + "sh:prefix": [ + { + "@value": "prov" + } + ], + "sh:namespace": [ + { + "@value": "http://www.w3.org/ns/prov#", + "@type": "xsd:anyURI" + } + ] + } + ] + }, + { + "@id": "_:oldProjecShape", + "@type": "sh:NodeShape", + "targetClass": "foaf:Project", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "ex:CheckOldProjectMetadata", + "minCount": 99999, + "maxCount": 99999, + "sh:message": "Project should be schema:Project, not foaf:Project" + } + ] + }, + { + "@id": "_:projectShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:Project", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:dateCreated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1, + "sh:lessThanOrEquals": { + "@id": "schema:dateUpdated" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateUpdated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:schemaVersion", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "path": "schema:creator", + "node": "_:creatorShape", + "minCount": 1 + } + ] + }, + { + "@id": "_:creatorShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "target": [ + { + "@type": "sh:SPARQLTarget", + "sh:prefixes": [ + { + "@id": "schema:" + }, + { + "@id": "prov:" + } + ], + "sh:select": [ + { + "@value": "SELECT ?this\nWHERE {\n ?this a schema:Person .\n MINUS { ?this a prov:Person . }\n}\n" + } + ] + } + ], + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:email", + "datatype": { + "@id": "xsd:string" + }, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:alternateName", + "datatype": { + "@id": "xsd:string" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:affiliation", + "datatype": { + "@id": "xsd:string" + } + } + ] + }, + { + "@id": "_:datasetShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + }, + { + "@id": "schema:license" + } + ], + "closed": true, + "target": [ + { + "@type": "sh:SPARQLTarget", + "sh:prefixes": [ + { + "@id": "schema:" + } + ], + "sh:select": [ + { + "@value": "SELECT ?this\nWHERE {\n ?this a schema:Dataset .\n MINUS { ?x schema:license ?this .}\n}\n" + } + ] + } + ], + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:isBasedOn", + "datatype": { + "@id": "xsd:string" + }, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateCreated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1, + "sh:lessThanOrEquals": { + "@id": "schema:datePublished" + } + }, + { + "path": "schema:creator", + "node": "_:creatorShape", + "minCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:datePublished", + "datatype": { + "@id": "xsd:string" + }, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:description", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:identifier", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:keywords", + "datatype": { + "@id": "xsd:string" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:atLocation", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:sameAs", + "datatype": { + "@id": "xsd:string" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:url", + "datatype": { + "@id": "xsd:string" + } + }, + { + "nodeKind": "sh:Literal", + "path": "schema:version", + "datatype": { + "@id": "xsd:string" + } + }, + { + "path": "schema:isPartOf", + "node": "_:projectShape", + "minCount": 1, + "maxCount": 1 + }, + { + "path": "schema:subjectOf", + "node": "_:datasetTagShape" + }, + { + "path": "schema:hasPart", + "node": "_:datasetFileShape" + }, + { + "path": "schema:inLanguage", + "node": "_:inLanguageShape" + } + ] + }, + { + "@id": "_:inLanguageShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:Language", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:alternateName", + "datatype": { + "@id": "xsd:string" + } + } + ] + }, + { + "@id": "_:datasetFileShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:DigitalDocument", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:dateCreated", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:url", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:atLocation", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "path": "schema:isPartOf", + "or": [ + { + "sh:class": { + "@id": "schema:Project" + } + }, + { + "nodeKind": "sh:Literal", + "datatype": { + "@id": "xsd:string" + } + } + ] + }, + { + "path": "schema:creator", + "node": "_:creatorShape", + "minCount": 1 + } + ] + }, + { + "@id": "_:datasetTagShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "schema:PublicationEvent", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "schema:name", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:description", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:startDate", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:location", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "schema:about", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + } + ] + }, + { + "@id": "_:activityShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "prov:Activity", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "rdfs:comment", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:wasInformedBy", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "sh:class": { + "@id": "prov:Association" + }, + "path": { + "sh:inversePath": { + "@id": "prov:activity" + } + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:influenced", + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:startedAtTime", + "datatype": { + "@id": "xsd:dateTime" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:endedAtTime", + "datatype": { + "@id": "xsd:dateTime" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "path": "prov:agent", + "or": [ + { + "node": "_:softwareAgentShape" + }, + { + "node": "_:creatorShape" + } + ], + "minCount": 2, + "maxCount": 2 + } + ] + }, + { + "@id": "_:softwareAgentShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "prov:SoftwareAgent", + "property": [ + { + "nodeKind": "sh:Literal", + "path": "rdfs:label", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:wasStartedBy", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + } + ] + }, + { + "@id": "_:generationShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "prov:Generation", + "property": [ + { + "path": { + "sh:inversePath": { + "@id": "prov:qualifiedGeneration" + } + }, + "nodeKind": "sh:BlankNodeOrIRI" + }, + { + "nodeKind": "sh:Literal", + "path": "prov:hadRole", + "datatype": { + "@id": "xsd:string" + }, + "minCount": 1, + "maxCount": 1 + } + ] + } + ] +} From 39b2e9a3d39f529524e3b80af714a3fd6bd470e8 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 7 Nov 2019 16:20:52 +0100 Subject: [PATCH 08/24] fixes rdf log output and pyshacl dependency --- renku/core/commands/format/graph.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index 055baea94f..ea6d60bc15 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -379,7 +379,7 @@ def rdf(graph, strict=False): "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) ) - click.echo() + click.echo(xml) FORMATS = { diff --git a/setup.py b/setup.py index 08268cee01..ae0d86b42e 100644 --- a/setup.py +++ b/setup.py @@ -87,7 +87,7 @@ 'PyYAML>=3.12', 'pyld>=1.0.3', 'pyOpenSSL>=19.0.0', - 'pyshacl>=0.11.3', + 'pyshacl>=0.11.3.post1', 'python-dateutil>=2.6.1', 'python-editor>=1.0.4', 'rdflib-jsonld>=0.4.0', From b9535a0847baf45b2115409895589d8202bbb898 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 7 Nov 2019 17:21:26 +0100 Subject: [PATCH 09/24] Fixes Project and Dataset SHACL tests --- renku/core/models/datasets.py | 2 +- tests/fixtures/force_dataset_shacl.json | 20 +++++--- tests/fixtures/force_datasetfile_shacl.json | 54 ++++++++++++--------- tests/fixtures/force_datasettag_shacl.json | 54 ++++++++++++--------- tests/fixtures/force_project_shacl.json | 22 ++++++--- 5 files changed, 88 insertions(+), 64 deletions(-) diff --git a/renku/core/models/datasets.py b/renku/core/models/datasets.py index 5345577ea9..d5394f3fc3 100644 --- a/renku/core/models/datasets.py +++ b/renku/core/models/datasets.py @@ -100,7 +100,7 @@ def _now(self): @_id.default def default_id(self): """Define default value for id field.""" - return '{0}@{1}'.format(self.name, self.commit) + return '_:{0}@{1}'.format(self.name, self.commit) @jsonld.s( diff --git a/tests/fixtures/force_dataset_shacl.json b/tests/fixtures/force_dataset_shacl.json index 8dd3078843..3c201f0869 100644 --- a/tests/fixtures/force_dataset_shacl.json +++ b/tests/fixtures/force_dataset_shacl.json @@ -1,21 +1,27 @@ { "@context": { - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "sh": "http://www.w3.org/ns/shacl#", - "schema": "http://schema.org/" + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "schema": "http://schema.org/" }, "@graph": [ { "@id": "_:forceDatasetShape", "@type": "sh:NodeShape", - "sh:targetNode": "schema:Dataset", + "sh:targetNode": { + "@id": "schema:Dataset", + "@type": "@id" + }, "sh:property": [ { "sh:path": [ { - "sh:inversePath": { - "@id": "rdf:type" - } + "sh:inversePath": [ + { + "@id": "rdf:type", + "@type": "@id" + } + ] } ], "sh:minCount": 1 diff --git a/tests/fixtures/force_datasetfile_shacl.json b/tests/fixtures/force_datasetfile_shacl.json index 34e4993dd5..45470e3740 100644 --- a/tests/fixtures/force_datasetfile_shacl.json +++ b/tests/fixtures/force_datasetfile_shacl.json @@ -1,26 +1,32 @@ { - "@context": { - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "sh": "http://www.w3.org/ns/shacl#", - "schema": "http://schema.org/" - }, - "@graph": [ - { - "@id": "_:forceDatasetShape", - "@type": "sh:NodeShape", - "sh:targetNode": "schema:DigitalDocument", - "sh:property": [ - { - "sh:path": [ - { - "sh:inversePath": [{ - "@id": "rdf:type" - }] - } - ], - "sh:minCount": 1 - } - ] - } - ] + "@context": { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "schema": "http://schema.org/" + }, + "@graph": [ + { + "@id": "_:forceDatasetShape", + "@type": "sh:NodeShape", + "sh:targetNode": { + "@id": "schema:DigitalDocument", + "@type": "@id" + }, + "sh:property": [ + { + "sh:path": [ + { + "sh:inversePath": [ + { + "@id": "rdf:type", + "@type": "@id" + } + ] + } + ], + "sh:minCount": 1 + } + ] + } + ] } diff --git a/tests/fixtures/force_datasettag_shacl.json b/tests/fixtures/force_datasettag_shacl.json index e5987723f0..106f5e0e41 100644 --- a/tests/fixtures/force_datasettag_shacl.json +++ b/tests/fixtures/force_datasettag_shacl.json @@ -1,26 +1,32 @@ { - "@context": { - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "sh": "http://www.w3.org/ns/shacl#", - "schema": "http://schema.org/" - }, - "@graph": [ - { - "@id": "_:forceDatasetShape", - "@type": "sh:NodeShape", - "sh:targetNode": "schema:PublicationEvent", - "sh:property": [ - { - "sh:path": [ - { - "sh:inversePath": [{ - "@id": "rdf:type" - }] - } - ], - "sh:minCount": 1 - } - ] - } - ] + "@context": { + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "schema": "http://schema.org/" + }, + "@graph": [ + { + "@id": "_:forceDatasetShape", + "@type": "sh:NodeShape", + "sh:targetNode": { + "@id": "schema:PublicationEvent", + "@type": "@id" + }, + "sh:property": [ + { + "sh:path": [ + { + "sh:inversePath": [ + { + "@id": "rdf:type", + "@type": "@id" + } + ] + } + ], + "sh:minCount": 1 + } + ] + } + ] } diff --git a/tests/fixtures/force_project_shacl.json b/tests/fixtures/force_project_shacl.json index 1ac5b928c4..b7fd526983 100644 --- a/tests/fixtures/force_project_shacl.json +++ b/tests/fixtures/force_project_shacl.json @@ -1,21 +1,27 @@ { "@context": { - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "sh": "http://www.w3.org/ns/shacl#", - "schema": "http://schema.org/" + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "sh": "http://www.w3.org/ns/shacl#", + "schema": "http://schema.org/" }, "@graph": [ { "@id": "_:forceProjectShape", "@type": "sh:NodeShape", - "sh:targetNode": "schema:Project", - "sh:property": [ + "sh:targetNode": { + "@id": "schema:Project", + "@type": "@id" + }, + "sh:property": [ { "sh:path": [ { - "sh:inversePath": { - "@id": "rdf:type" - } + "sh:inversePath": [ + { + "@id": "rdf:type", + "@type": "@id" + } + ] } ], "sh:minCount": 1 From 5339b6de2f6fdf4da78a0e9381d181bb7760970a Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Fri, 8 Nov 2019 13:30:12 +0100 Subject: [PATCH 10/24] Adds workflow SHACl validation --- renku/core/commands/format/graph.py | 2 +- renku/core/models/provenance/activities.py | 13 ++- renku/data/shacl_shape.json | 110 ++++++++++++++++----- tests/cli/test_migrate.py | 2 +- tests/cli/test_update.py | 4 +- 5 files changed, 101 insertions(+), 30 deletions(-) diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index ea6d60bc15..d7c97fca84 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -106,7 +106,7 @@ def _rdf2dot_simple(g, stream): import re path_re = re.compile( - r'file:///(?P[a-zA-Z]+)/' + r'(?Pfile://|https://\w+/\w+/){0,1}(?P[a-zA-Z]+)/' r'(?P\w+)' r'(?P.+)?' ) diff --git a/renku/core/models/provenance/activities.py b/renku/core/models/provenance/activities.py index 6b9b073796..e6b5b782fe 100644 --- a/renku/core/models/provenance/activities.py +++ b/renku/core/models/provenance/activities.py @@ -18,9 +18,10 @@ """Represent a Git commit.""" import os +import urllib import uuid from collections import OrderedDict -from pathlib import Path +from pathlib import Path, posixpath import attr from git import NULL_TREE @@ -212,7 +213,15 @@ def paths(self): @classmethod def generate_id(cls, commit): """Calculate action ID.""" - return 'commit/{commit.hexsha}'.format(commit=commit) + host = os.environ.get('RENKU_DOMAIN') or 'localhost' + + # always set the id by the identifier + return urllib.parse.urljoin( + 'https://{host}'.format(host=host), + posixpath.join( + '/activities', 'commit/{commit.hexsha}'.format(commit=commit) + ) + ) @_id.default def default_id(self): diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json index 3148da80dc..e12ec04cd7 100644 --- a/renku/data/shacl_shape.json +++ b/renku/data/shacl_shape.json @@ -7,6 +7,7 @@ "schema": "http://schema.org/", "foaf": "http://xmlns.com/foaf/0.1/", "prov": "http://www.w3.org/ns/prov#", + "wfprov": "http://purl.org/wf4ever/wfprov#", "closed": { "@id": "sh:closed", "@type": "http://www.w3.org/2001/XMLSchema#boolean" @@ -541,30 +542,20 @@ }, { "nodeKind": "sh:Literal", - "path": "prov:wasInformedBy", + "path": "rdfs:label", "datatype": { "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 + } }, { - "sh:class": { - "@id": "prov:Association" - }, - "path": { - "sh:inversePath": { - "@id": "prov:activity" - } - }, + "nodeKind": "sh:IRI", + "path": "prov:wasInformedBy", "minCount": 1, "maxCount": 1 }, { "nodeKind": "sh:Literal", - "path": "prov:influenced", - "minCount": 1, - "maxCount": 1 + "path": "prov:influenced" }, { "nodeKind": "sh:Literal", @@ -592,10 +583,81 @@ }, { "node": "_:creatorShape" + }, + { + "nodeKind": "sh:IRI" } ], "minCount": 2, "maxCount": 2 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:atLocation", + "datatype": { + "@id": "xsd:string" + } + }, + { + "path": "prov:qualifiedUsage", + "node":"_:usageShape" + }, + { + "path": "prov:qualifiedAssociation", + "node":"_:associationShape" + }, + { + "path": "wfprov:wasPartOfWorkflowRun", + "sh:class": { + "@id": "wfprov:WorkflowRun" + } + } + ] + }, + { + "@id": "_:associationShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "prov:Association", + "property": [ + { + "path": "prov:hadPlan", + "minCount": 1 + }, + { + "path": "prov:agent", + "node": "_:softwareAgentShape", + "minCount": 1, + "maxCount": 1 + } + ] + }, + { + "@id": "_:usageShape", + "@type": "sh:NodeShape", + "ignoredProperties": [ + { + "@id": "rdf:type" + } + ], + "closed": true, + "targetClass": "prov:Usage", + "property": [ + { + "path": "prov:entity", + "minCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "prov:hadRole", + "datatype": { + "@id": "xsd:string" + } } ] }, @@ -615,16 +677,11 @@ "path": "rdfs:label", "datatype": { "@id": "xsd:string" - }, - "minCount": 1, - "maxCount": 1 + } }, { - "nodeKind": "sh:Literal", + "nodeKind": "sh:IRI", "path": "prov:wasStartedBy", - "datatype": { - "@id": "xsd:string" - }, "minCount": 1, "maxCount": 1 } @@ -654,9 +711,14 @@ "path": "prov:hadRole", "datatype": { "@id": "xsd:string" + } + }, + { + "sh:class": { + "@id": "prov:Activity" }, - "minCount": 1, - "maxCount": 1 + "path": "prov:activity", + "minCount": 1 } ] } diff --git a/tests/cli/test_migrate.py b/tests/cli/test_migrate.py index 70306de76d..59c597bdca 100644 --- a/tests/cli/test_migrate.py +++ b/tests/cli/test_migrate.py @@ -146,7 +146,7 @@ def test_graph_building_after_migration(isolated_runner, old_project): result = isolated_runner.invoke(cli, ['migrate', 'datasets']) assert 0 == result.exit_code - result = isolated_runner.invoke(cli, ['log', '--strict']) + result = isolated_runner.invoke(cli, ['log']) assert 0 == result.exit_code diff --git a/tests/cli/test_update.py b/tests/cli/test_update.py index 98309aea55..b0f5080b3c 100644 --- a/tests/cli/test_update.py +++ b/tests/cli/test_update.py @@ -93,11 +93,11 @@ def test_update(runner, project, run): ['log', '--format', output_format], catch_exceptions=False, ) - assert 0 == result.exit_code, output_format + assert 0 == result.exit_code, result.output assert source.name in result.output, output_format if output_format == 'nt': - r, _, t = validate_graph(result.output) + r, _, t = validate_graph(result.output, format='nt') assert r is True, t From 5624fddd8438ad8c8daa22bebed9e5e21e5dfaeb Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Fri, 8 Nov 2019 15:20:55 +0100 Subject: [PATCH 11/24] Fix SHACL schema and renku log validation --- renku/core/commands/format/graph.py | 6 +-- renku/core/utils/shacl.py | 2 - renku/data/shacl_shape.json | 83 ++++++++++++++++++++++------- 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index d7c97fca84..5940a671a5 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -341,7 +341,7 @@ def jsonld(graph, strict=False): r, _, t = validate_graph(ld, format='json-ld') if not r: - click.BadParameter( + raise click.BadParameter( "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) ) click.echo(ld) @@ -361,7 +361,7 @@ def nt(graph, strict=False): r, _, t = validate_graph(nt, format='nt') if not r: - click.BadParameter( + raise click.BadParameter( "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) ) @@ -375,7 +375,7 @@ def rdf(graph, strict=False): r, _, t = validate_graph(xml, format='xml') if not r: - click.BadParameter( + raise click.BadParameter( "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) ) diff --git a/renku/core/utils/shacl.py b/renku/core/utils/shacl.py index b4acfb1212..4d406bf281 100644 --- a/renku/core/utils/shacl.py +++ b/renku/core/utils/shacl.py @@ -17,8 +17,6 @@ # limitations under the License. """JSON-LD SHACL validation.""" -import json - from pkg_resources import resource_string from pyshacl import validate diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json index e12ec04cd7..e0939f22bc 100644 --- a/renku/data/shacl_shape.json +++ b/renku/data/shacl_shape.json @@ -26,10 +26,6 @@ }, "minCount": "sh:minCount", "maxCount": "sh:maxCount", - "node": { - "@id": "sh:node", - "@type": "@id" - }, "nodeKind": { "@id": "sh:nodeKind", "@type": "@id" @@ -152,7 +148,9 @@ }, { "path": "schema:creator", - "node": "_:creatorShape", + "sh:class":{ + "@id": "schema:Person" + }, "minCount": 1 } ] @@ -268,7 +266,9 @@ }, { "path": "schema:creator", - "node": "_:creatorShape", + "sh:class": { + "@id": "schema:Person" + }, "minCount": 1 }, { @@ -345,21 +345,42 @@ }, { "path": "schema:isPartOf", - "node": "_:projectShape", + "sh:class": { + "@id": "schema:Project" + }, "minCount": 1, "maxCount": 1 }, { "path": "schema:subjectOf", - "node": "_:datasetTagShape" + "sh:class": { + "@id": "schema:PublicationEvent" + } }, { "path": "schema:hasPart", - "node": "_:datasetFileShape" + "sh:class": { + "@id": "schema:DigitalDocument" + } }, { "path": "schema:inLanguage", - "node": "_:inLanguageShape" + "sh:class": { + "@id": "schema:Language" + } + }, + { + "nodeKind": "sh:Literal", + "path": "rdfs:label", + "datatype": { + "@id": "xsd:string" + } + }, + { + "path": "prov:qualifiedGeneration", + "sh:class": { + "@id": "prov:Generation" + } } ] }, @@ -457,8 +478,17 @@ }, { "path": "schema:creator", - "node": "_:creatorShape", + "sh:class": { + "@id": "schema:Person" + }, "minCount": 1 + }, + { + "nodeKind": "sh:Literal", + "path": "rdfs:label", + "datatype": { + "@id": "xsd:string" + } } ] }, @@ -579,10 +609,14 @@ "path": "prov:agent", "or": [ { - "node": "_:softwareAgentShape" + "sh:class": { + "@id": "prov:SoftwareAgent" + } }, { - "node": "_:creatorShape" + "sh:class": { + "@id": "schema:Person" + } }, { "nodeKind": "sh:IRI" @@ -600,11 +634,15 @@ }, { "path": "prov:qualifiedUsage", - "node":"_:usageShape" + "sh:class": { + "@id": "prov:Usage" + } }, { "path": "prov:qualifiedAssociation", - "node":"_:associationShape" + "sh:class": { + "@id": "prov:Association" + } }, { "path": "wfprov:wasPartOfWorkflowRun", @@ -631,7 +669,9 @@ }, { "path": "prov:agent", - "node": "_:softwareAgentShape", + "sh:class": { + "@id": "prov:SoftwareAgent" + }, "minCount": 1, "maxCount": 1 } @@ -680,9 +720,16 @@ } }, { - "nodeKind": "sh:IRI", "path": "prov:wasStartedBy", - "minCount": 1, + "or": [ + { + "nodeKind": "sh:IRI" + }, + { + "sh:class": { + "@id": "prov:Person" + } + }], "maxCount": 1 } ] From f0f20d4b3b7510af676610b1703158b9886ab0c3 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Fri, 8 Nov 2019 15:38:11 +0100 Subject: [PATCH 12/24] Fix memory leak in integration tests --- tests/cli/test_integration_datasets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/cli/test_integration_datasets.py b/tests/cli/test_integration_datasets.py index d00184ca17..188234583e 100644 --- a/tests/cli/test_integration_datasets.py +++ b/tests/cli/test_integration_datasets.py @@ -352,7 +352,9 @@ def test_dataset_export_upload_multiple( @pytest.mark.integration -def test_dataset_export_upload_failure(runner, project, tmpdir, client): +def test_dataset_export_upload_failure( + runner, project, tmpdir, client, zenodo_sandbox +): """Test failed uploading of a file to Zenodo deposit.""" result = runner.invoke(cli, ['dataset', 'create', 'my-dataset']) From c827f3a93c8a3cd3867641da69273f6082149d69 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Tue, 12 Nov 2019 09:03:31 +0100 Subject: [PATCH 13/24] Fixes Python 3.5 path --- tests/core/models/test_shacl_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py index 26163b9f94..048fafb90d 100644 --- a/tests/core/models/test_shacl_schema.py +++ b/tests/core/models/test_shacl_schema.py @@ -17,7 +17,7 @@ # limitations under the License. """test KG against SHACL shape.""" -from pathlib import Path +from renku.core.compat import Path from pyld import jsonld as ld From a760d2fbbdfc6e9cc2bf042829414088d36eef72 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Tue, 12 Nov 2019 09:32:14 +0100 Subject: [PATCH 14/24] Fixes sort order --- tests/core/models/test_shacl_schema.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py index 048fafb90d..49d36ad02d 100644 --- a/tests/core/models/test_shacl_schema.py +++ b/tests/core/models/test_shacl_schema.py @@ -17,11 +17,10 @@ # limitations under the License. """test KG against SHACL shape.""" -from renku.core.compat import Path - from pyld import jsonld as ld from renku.cli import cli +from renku.core.compat import Path from renku.core.utils.shacl import validate_graph From 901c67c1f403dc7c472aeea6983e1f3eecfb03ab Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Tue, 12 Nov 2019 11:57:43 +0100 Subject: [PATCH 15/24] Fixes python 3.5 tests --- tests/core/models/test_shacl_schema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py index 49d36ad02d..fb20bea5bd 100644 --- a/tests/core/models/test_shacl_schema.py +++ b/tests/core/models/test_shacl_schema.py @@ -69,13 +69,13 @@ def test_dataset_shacl(tmpdir, runner, project, client): } ) - r, _, t = validate_graph(rdf, shacl_path=force_dataset_path) + r, _, t = validate_graph(rdf, shacl_path=str(force_dataset_path)) assert r is True, t - r, _, t = validate_graph(rdf, shacl_path=force_datasetfile_path) + r, _, t = validate_graph(rdf, shacl_path=str(force_datasetfile_path)) assert r is True, t - r, _, t = validate_graph(rdf, shacl_path=force_datasettag_path) + r, _, t = validate_graph(rdf, shacl_path=str(force_datasettag_path)) assert r is True, t r, _, t = validate_graph(rdf) @@ -101,7 +101,7 @@ def test_project_shacl(project, client): 'produceGeneralizedRdf': False } ) - r, _, t = validate_graph(rdf, shacl_path=path) + r, _, t = validate_graph(rdf, shacl_path=str(path)) assert r is True, t r, _, t = validate_graph(rdf) From 835cff13826505f9b18b052936e4f15c45d14d06 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 14 Nov 2019 08:07:06 +0100 Subject: [PATCH 16/24] Patches pyld to prevent contexts from overwhelming its cache --- renku/core/commands/checks/validate_shacl.py | 4 +-- renku/core/commands/format/graph.py | 4 +-- renku/core/compat.py | 38 +++++++++++++++++++- renku/core/models/jsonld.py | 10 +++--- tests/cli/test_log.py | 1 + tests/core/models/test_shacl_schema.py | 8 ++--- 6 files changed, 50 insertions(+), 15 deletions(-) diff --git a/renku/core/commands/checks/validate_shacl.py b/renku/core/commands/checks/validate_shacl.py index 73385014a1..84f0a9045d 100644 --- a/renku/core/commands/checks/validate_shacl.py +++ b/renku/core/commands/checks/validate_shacl.py @@ -17,11 +17,11 @@ # limitations under the License. """Check KG structure using SHACL.""" import yaml -from pyld import jsonld as ld from rdflib.namespace import Namespace from rdflib.term import BNode from renku.core.commands.echo import WARNING +from renku.core.compat import pyld from renku.core.models.jsonld import NoDatesSafeLoader from renku.core.utils.shacl import validate_graph @@ -100,7 +100,7 @@ def check_shacl_structure(path): with path.open(mode='r') as fp: source = yaml.load(fp, Loader=NoDatesSafeLoader) or {} - rdf = ld.to_rdf( + rdf = pyld.jsonld.to_rdf( source, options={ 'format': 'application/n-quads', diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index 5940a671a5..b983ffe1e8 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -39,10 +39,10 @@ def _jsonld(graph, format, *args, **kwargs): """Return formatted graph in JSON-LD ``format`` function.""" import json - from pyld import jsonld + from renku.core.compat import pyld from renku.core.models.jsonld import asjsonld - output = getattr(jsonld, format)([ + output = getattr(pyld.jsonld, format)([ asjsonld(action) for action in graph.activities.values() ]) return json.dumps(output, indent=2) diff --git a/renku/core/compat.py b/renku/core/compat.py index cfbbda9161..4de97ac978 100644 --- a/renku/core/compat.py +++ b/renku/core/compat.py @@ -18,10 +18,14 @@ """Compatibility layer for different Python versions.""" import contextlib +import json import os import sys +from collections import deque from pathlib import Path +import pyld + if sys.version_info < (3, 6): original_resolve = Path.resolve @@ -63,4 +67,36 @@ def __exit__(self, *excinfo): except NameError: # pragma: no cover FileNotFoundError = IOError -__all__ = ('FileNotFoundError', 'Path', 'contextlib') + +class ActiveContextCache(object): + """Pyld context cache without issue of missing contexts.""" + + def __init__(self, size=100): + self.order = deque() + self.cache = {} + self.size = size + + def get(self, active_ctx, local_ctx): + key1 = json.dumps(active_ctx) + key2 = json.dumps(local_ctx) + return self.cache.get(key1, {}).get(key2) + + def set(self, active_ctx, local_ctx, result): + if len(self.order) == self.size: + entry = self.order.popleft() + if sum( + e['activeCtx'] == entry['activeCtx'] and + e['localCtx'] == entry['localCtx'] for e in self.order + ) == 0: + # only delete from cache if it doesn't exist in context deque + print("this totally works") + del self.cache[entry['activeCtx']][entry['localCtx']] + key1 = json.dumps(active_ctx) + key2 = json.dumps(local_ctx) + self.order.append({'activeCtx': key1, 'localCtx': key2}) + self.cache.setdefault(key1, {})[key2] = json.loads(json.dumps(result)) + + +pyld._cache = {'activeCtx': ActiveContextCache()} + +__all__ = ('FileNotFoundError', 'Path', 'contextlib', 'pyld') diff --git a/renku/core/models/jsonld.py b/renku/core/models/jsonld.py index 8f2ac38413..5e2a9adbd8 100644 --- a/renku/core/models/jsonld.py +++ b/renku/core/models/jsonld.py @@ -30,8 +30,8 @@ from attr._compat import iteritems from attr._funcs import has from attr._make import Factory, fields -from pyld import jsonld as ld +from renku.core.compat import pyld from renku.core.models.locals import ReferenceMixin, with_reference from renku.core.models.migrations import JSONLD_MIGRATIONS @@ -149,7 +149,7 @@ def wrap(cls): # Register class for given JSON-LD @type try: - type_ = ld.expand({ + type_ = pyld.jsonld.expand({ '@type': jsonld_cls._jsonld_type, '@context': context })[0]['@type'] @@ -473,10 +473,10 @@ def from_jsonld( if cls._jsonld_translate: # perform the translation - data = ld.compact(data, cls._jsonld_translate) + data = pyld.jsonld.compact(data, cls._jsonld_translate) # compact using the class json-ld context data.pop('@context', None) - data = ld.compact(data, cls._jsonld_context) + data = pyld.jsonld.compact(data, cls._jsonld_context) data.setdefault('@context', cls._jsonld_context) @@ -504,7 +504,7 @@ def from_jsonld( data['@context'] = {'@base': data['@context']} data['@context'].update(cls._jsonld_context) try: - compacted = ld.compact(data, cls._jsonld_context) + compacted = pyld.jsonld.compact(data, cls._jsonld_context) except Exception: compacted = data else: diff --git a/tests/cli/test_log.py b/tests/cli/test_log.py index 140cfb9ead..b0a13697e8 100644 --- a/tests/cli/test_log.py +++ b/tests/cli/test_log.py @@ -62,5 +62,6 @@ def test_dataset_log_strict(tmpdir, runner, project, client, format): result = runner.invoke( cli, ['log', '--strict', '--format={}'.format(format)] ) + assert 0 == result.exit_code, result.output assert all(p in result.output for p in paths) diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py index fb20bea5bd..0bb0467dfe 100644 --- a/tests/core/models/test_shacl_schema.py +++ b/tests/core/models/test_shacl_schema.py @@ -17,10 +17,8 @@ # limitations under the License. """test KG against SHACL shape.""" -from pyld import jsonld as ld - from renku.cli import cli -from renku.core.compat import Path +from renku.core.compat import Path, pyld from renku.core.utils.shacl import validate_graph @@ -61,7 +59,7 @@ def test_dataset_shacl(tmpdir, runner, project, client): with client.with_dataset('dataset') as dataset: g = dataset.asjsonld() - rdf = ld.to_rdf( + rdf = pyld.jsonld.to_rdf( g, options={ 'format': 'application/n-quads', @@ -94,7 +92,7 @@ def test_project_shacl(project, client): project.creator = Creator(email='johndoe@example.com', name='Johnny Doe') g = project.asjsonld() - rdf = ld.to_rdf( + rdf = pyld.jsonld.to_rdf( g, options={ 'format': 'application/n-quads', From 04b07eb5d62f3ec4d0e81f274c29cd7893a33b5c Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 14 Nov 2019 08:56:47 +0100 Subject: [PATCH 17/24] Cleanup of pyld patch --- renku/core/compat.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/renku/core/compat.py b/renku/core/compat.py index 4de97ac978..8bc6ed24d4 100644 --- a/renku/core/compat.py +++ b/renku/core/compat.py @@ -21,7 +21,6 @@ import json import os import sys -from collections import deque from pathlib import Path import pyld @@ -68,19 +67,9 @@ def __exit__(self, *excinfo): FileNotFoundError = IOError -class ActiveContextCache(object): +class PatchedActiveContextCache(pyld.jsonld.ActiveContextCache): """Pyld context cache without issue of missing contexts.""" - def __init__(self, size=100): - self.order = deque() - self.cache = {} - self.size = size - - def get(self, active_ctx, local_ctx): - key1 = json.dumps(active_ctx) - key2 = json.dumps(local_ctx) - return self.cache.get(key1, {}).get(key2) - def set(self, active_ctx, local_ctx, result): if len(self.order) == self.size: entry = self.order.popleft() @@ -89,7 +78,6 @@ def set(self, active_ctx, local_ctx, result): e['localCtx'] == entry['localCtx'] for e in self.order ) == 0: # only delete from cache if it doesn't exist in context deque - print("this totally works") del self.cache[entry['activeCtx']][entry['localCtx']] key1 = json.dumps(active_ctx) key2 = json.dumps(local_ctx) @@ -97,6 +85,6 @@ def set(self, active_ctx, local_ctx, result): self.cache.setdefault(key1, {})[key2] = json.loads(json.dumps(result)) -pyld._cache = {'activeCtx': ActiveContextCache()} +pyld.jsonld._cache = {'activeCtx': PatchedActiveContextCache()} __all__ = ('FileNotFoundError', 'Path', 'contextlib', 'pyld') From ed81d9fa2cdda44c9adc8885e9d0e89a931fd88f Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 14 Nov 2019 09:43:04 +0100 Subject: [PATCH 18/24] Updates documentation --- renku/cli/log.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/renku/cli/log.py b/renku/cli/log.py index dc7edfb168..1873314fe2 100644 --- a/renku/cli/log.py +++ b/renku/cli/log.py @@ -52,6 +52,15 @@ * `ascii` * `dot` +* `dot-full` +* `dot-landscape` +* `dot-full-landscape` +* `dot-debug` +* `json-ld` +* `json-ld-graph` +* `Makefile` +* `nt` +* `rdf` You can generate a PNG of the full history of all files in the repository using the :program:`dot` program. @@ -62,6 +71,15 @@ $ renku log --format dot $FILES | dot -Tpng > /tmp/graph.png $ open /tmp/graph.png +Output validation +~~~~~~~~~~~~~~~~~ + +The ``--strict`` option forces the output to be validated against the Renku +SHACL schema, causing the command to fail if the generated output is not +valid, as well as printing detailed information on all the issues found. +The ``--strict`` option is only supported for the ``jsonld``, ``rdf`` and +``nt`` output formats. + """ import click From b299b49c5caca506ac677ce2d42fdb902068db75 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 14 Nov 2019 12:45:19 +0100 Subject: [PATCH 19/24] Cleanup: remove hardcoded versions from shacl shape --- renku/data/shacl_shape.json | 1 - 1 file changed, 1 deletion(-) diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json index e0939f22bc..eba1906a49 100644 --- a/renku/data/shacl_shape.json +++ b/renku/data/shacl_shape.json @@ -44,7 +44,6 @@ "@type": "@id" } }, - "version": "0.7.1.dev7+dirty", "@graph": [ { "@id": "schema:", From 328f5b5c49174f411a4dee1d83349629f3a4eeea Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Fri, 15 Nov 2019 12:50:00 +0100 Subject: [PATCH 20/24] Cleanup and addresses PR comments --- renku/core/commands/checks/validate_shacl.py | 21 +++++++++++--------- renku/core/commands/format/graph.py | 15 +++++++------- renku/core/errors.py | 4 ++++ renku/core/utils/shacl.py | 4 ++-- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/renku/core/commands/checks/validate_shacl.py b/renku/core/commands/checks/validate_shacl.py index 84f0a9045d..ae7c212a17 100644 --- a/renku/core/commands/checks/validate_shacl.py +++ b/renku/core/commands/checks/validate_shacl.py @@ -37,7 +37,7 @@ def _shacl_graph_to_string(graph): res = graph.value(result, sh.resultMessage) if res: - message = '{}: {}'.format(path, res) + message = '{0}: {1}'.format(path, res) else: kind = graph.value(result, sh.sourceConstraintComponent) focusNode = graph.value(result, sh.focusNode) @@ -45,7 +45,9 @@ def _shacl_graph_to_string(graph): if isinstance(focusNode, BNode): focusNode = '' - message = '{}: Type: {}, Node ID: {}'.format(path, kind, focusNode) + message = '{0}: Type: {1}, Node ID: {2}'.format( + path, kind, focusNode + ) problems.append(message) @@ -61,9 +63,8 @@ def check_project_structure(client): if conform: return True, None - problems = ( - WARNING + 'Invalid structure of project metadata\n\t' + - _shacl_graph_to_string(graph) + problems = '{0}Invalid structure of project metadata\n\t{1}'.format( + WARNING, _shacl_graph_to_string(graph) ) return False, problems @@ -73,13 +74,13 @@ def check_datasets_structure(client): """Validate dataset metadata against SHACL.""" ok = True - problems = WARNING + 'Invalid structure of dataset metadata\n' + problems = ['{0}Invalid structure of dataset metadata'.format(WARNING)] for path in client.renku_datasets_path.rglob(client.METADATA): try: conform, graph, t = check_shacl_structure(path) except (Exception, BaseException) as e: - problems += 'Couldn\'t validate {}: {}\n\n'.format(path, e) + problems.append('Couldn\'t validate {0}: {1}\n\n'.format(path, e)) continue if conform: @@ -87,12 +88,14 @@ def check_datasets_structure(client): ok = False - problems += str(path) + '\n\t' + _shacl_graph_to_string(graph) + '\n\n' + problems.append( + '{0}\n\t{1}\n'.format(path, _shacl_graph_to_string(graph)) + ) if ok: return True, None - return False, problems + return False, '\n'.join(problems) def check_shacl_structure(path): diff --git a/renku/core/commands/format/graph.py b/renku/core/commands/format/graph.py index b983ffe1e8..ef4c19f1d5 100644 --- a/renku/core/commands/format/graph.py +++ b/renku/core/commands/format/graph.py @@ -21,6 +21,7 @@ import click +from renku.core.errors import SHACLValidationError from renku.core.utils.shacl import validate_graph @@ -30,7 +31,7 @@ def ascii(graph, strict=False): from ..echo import echo_via_pager if strict: - raise click.BadParameter('--strict not supported for json-ld-graph') + raise SHACLValidationError('--strict not supported for json-ld-graph') echo_via_pager(str(DAG(graph))) @@ -68,7 +69,7 @@ def dot(graph, simple=True, debug=False, landscape=False, strict=False): from rdflib.tools.rdf2dot import rdf2dot if strict: - raise click.BadParameter('--strict not supported for json-ld-graph') + raise SHACLValidationError('--strict not supported for json-ld-graph') g = _conjunctive_graph(graph) @@ -312,7 +313,7 @@ def makefile(graph, strict=False): from renku.core.models.provenance.activities import ProcessRun, WorkflowRun if strict: - raise click.BadParameter('--strict not supported for json-ld-graph') + raise SHACLValidationError('--strict not supported for json-ld-graph') for activity in graph.activities.values(): if not isinstance(activity, ProcessRun): @@ -341,7 +342,7 @@ def jsonld(graph, strict=False): r, _, t = validate_graph(ld, format='json-ld') if not r: - raise click.BadParameter( + raise SHACLValidationError( "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) ) click.echo(ld) @@ -350,7 +351,7 @@ def jsonld(graph, strict=False): def jsonld_graph(graph, strict=False): """Format graph as JSON-LD graph file.""" if strict: - raise click.BadParameter('--strict not supported for json-ld-graph') + raise SHACLValidationError('--strict not supported for json-ld-graph') click.echo(_jsonld(graph, 'flatten')) @@ -361,7 +362,7 @@ def nt(graph, strict=False): r, _, t = validate_graph(nt, format='nt') if not r: - raise click.BadParameter( + raise SHACLValidationError( "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) ) @@ -375,7 +376,7 @@ def rdf(graph, strict=False): r, _, t = validate_graph(xml, format='xml') if not r: - raise click.BadParameter( + raise SHACLValidationError( "{}\nCouldn't get log: Invalid Knowledge Graph data".format(t) ) diff --git a/renku/core/errors.py b/renku/core/errors.py index 473ab9701c..92cfe46b36 100644 --- a/renku/core/errors.py +++ b/renku/core/errors.py @@ -373,3 +373,7 @@ class UrlSchemeNotSupported(RenkuException): class OperationError(RenkuException): """Raised when an operation at runtime raises an error.""" + + +class SHACLValidationError(RenkuException): + """Raises when SHACL validation of the graph fails.""" diff --git a/renku/core/utils/shacl.py b/renku/core/utils/shacl.py index 4d406bf281..71e2a15eba 100644 --- a/renku/core/utils/shacl.py +++ b/renku/core/utils/shacl.py @@ -15,7 +15,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""JSON-LD SHACL validation.""" +"""JSON-LD SHACL validations.""" from pkg_resources import resource_string from pyshacl import validate @@ -24,7 +24,7 @@ def validate_graph(graph, shacl_path=None, format='nquads'): """Validate the current graph with a SHACL schema. - uses default schema if not supplied. + Uses default schema if not supplied. """ if shacl_path: with open(shacl_path, 'r', encoding='utf-8') as f: From c0468b45853ba91980569b2be5531ebe1e52c774 Mon Sep 17 00:00:00 2001 From: Rok Roskar Date: Fri, 15 Nov 2019 22:55:38 +0100 Subject: [PATCH 21/24] fix: sanitize author name for nt output --- renku/core/models/provenance/agents.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/renku/core/models/provenance/agents.py b/renku/core/models/provenance/agents.py index 67f429aef9..7e731b1c5a 100644 --- a/renku/core/models/provenance/agents.py +++ b/renku/core/models/provenance/agents.py @@ -58,9 +58,14 @@ class Person: @_id.default def default_id(self): """Set the default id.""" + import string if self.email: return 'mailto:{email}'.format(email=self.email) - return '_:{}'.format(''.join(self.name.lower().split())) + + # prep name to be a valid ntuple string + name = self.name.translate(str.maketrans('', '', string.punctuation)) + name = ''.join(filter(lambda x: x in string.printable, name)) + return '_:{}'.format(''.join(name.lower().split())) @email.validator def check_email(self, attribute, value): From cd13981481156027a8abb03065a527a8b98036b7 Mon Sep 17 00:00:00 2001 From: Rok Roskar Date: Fri, 15 Nov 2019 23:12:28 +0100 Subject: [PATCH 22/24] refactor: Creator --> Person --- tests/core/models/test_shacl_schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/core/models/test_shacl_schema.py b/tests/core/models/test_shacl_schema.py index 0bb0467dfe..fd6d6f1e01 100644 --- a/tests/core/models/test_shacl_schema.py +++ b/tests/core/models/test_shacl_schema.py @@ -82,14 +82,14 @@ def test_dataset_shacl(tmpdir, runner, project, client): def test_project_shacl(project, client): """Test project metadata structure.""" - from renku.core.models.creators import Creator + from renku.core.models.provenance.agents import Person path = Path( __file__ ).parent.parent.parent / 'fixtures' / 'force_project_shacl.json' project = client.project - project.creator = Creator(email='johndoe@example.com', name='Johnny Doe') + project.creator = Person(email='johndoe@example.com', name='Johnny Doe') g = project.asjsonld() rdf = pyld.jsonld.to_rdf( From 3a4a74d387ecd4eec9f28b8e14ce87ef6aa8b283 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Thu, 28 Nov 2019 10:57:34 +0100 Subject: [PATCH 23/24] Adapts SHACL and tests to changes on master --- renku/data/shacl_shape.json | 8 ++++++++ tests/cli/test_log.py | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/renku/data/shacl_shape.json b/renku/data/shacl_shape.json index eba1906a49..31d53f2277 100644 --- a/renku/data/shacl_shape.json +++ b/renku/data/shacl_shape.json @@ -560,6 +560,14 @@ "closed": true, "targetClass": "prov:Activity", "property": [ + { + "path": "schema:isPartOf", + "sh:class": { + "@id": "schema:Project" + }, + "minCount": 1, + "maxCount": 1 + }, { "nodeKind": "sh:Literal", "path": "rdfs:comment", diff --git a/tests/cli/test_log.py b/tests/cli/test_log.py index b0a13697e8..a561c4d353 100644 --- a/tests/cli/test_log.py +++ b/tests/cli/test_log.py @@ -47,10 +47,12 @@ def test_dataset_log_strict(tmpdir, runner, project, client, format): assert 0 == result.exit_code paths = [] + test_paths = [] for i in range(3): new_file = tmpdir.join('file_{0}'.format(i)) new_file.write(str(i)) paths.append(str(new_file)) + test_paths.append(str(new_file.relto(tmpdir.join('..')))) # add data result = runner.invoke( @@ -64,4 +66,4 @@ def test_dataset_log_strict(tmpdir, runner, project, client, format): ) assert 0 == result.exit_code, result.output - assert all(p in result.output for p in paths) + assert all(p in result.output for p in test_paths) From 877113e7e9157302a4b242c614c5bb571d19c5a3 Mon Sep 17 00:00:00 2001 From: Ralf Grubenmann Date: Fri, 29 Nov 2019 11:36:34 +0100 Subject: [PATCH 24/24] Changed to proper objects on loading and fix DatasetFile ids for old datasets --- renku/core/commands/dataset.py | 2 +- renku/core/models/datasets.py | 16 ++++++++++- tests/cli/test_integration_datasets.py | 39 ++++++++++++-------------- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/renku/core/commands/dataset.py b/renku/core/commands/dataset.py index 288a8983bf..6604b2838a 100644 --- a/renku/core/commands/dataset.py +++ b/renku/core/commands/dataset.py @@ -567,7 +567,7 @@ def update_datasets( file_.dataset = dataset possible_updates.append(file_) - unique_remotes.add(file_.based_on['url']) + unique_remotes.add(file_.based_on.url) if ref and len(unique_remotes) > 1: raise ParameterError( diff --git a/renku/core/models/datasets.py b/renku/core/models/datasets.py index 7a17d1821c..22295b5e6c 100644 --- a/renku/core/models/datasets.py +++ b/renku/core/models/datasets.py @@ -150,6 +150,12 @@ def convert_filename_path(p): return Path(p).name +def convert_based_on(v): + """Convert based_on to DatasetFile.""" + if v: + return DatasetFile.from_jsonld(v) + + @jsonld.s( type='schema:DigitalDocument', slots=True, @@ -179,7 +185,10 @@ class DatasetFile(Entity, CreatorMixin): url = jsonld.ib(default=None, context='schema:url', kw_only=True) based_on = jsonld.ib( - default=None, context='schema:isBasedOn', kw_only=True + default=None, + context='schema:isBasedOn', + kw_only=True, + converter=convert_based_on ) @added.default @@ -213,6 +222,11 @@ def __attrs_post_init__(self): if not self.name: self.name = self.filename + parsed_id = urllib.parse.urlparse(self._id) + + if not parsed_id.scheme: + self._id = 'file://{}'.format(self._id) + def _convert_dataset_files(value): """Convert dataset files.""" diff --git a/tests/cli/test_integration_datasets.py b/tests/cli/test_integration_datasets.py index d9b83e8eaa..5fe9e2ff05 100644 --- a/tests/cli/test_integration_datasets.py +++ b/tests/cli/test_integration_datasets.py @@ -22,7 +22,6 @@ import git import pytest -import yaml from renku.cli import cli @@ -596,13 +595,11 @@ def test_usage_error_in_add_from_git(runner, client, params, n_urls, message): def read_dataset_file_metadata(client, dataset_name, filename): """Return metadata from dataset's YAML file.""" - path = client.dataset_path(dataset_name) - assert path.exists() + with client.with_dataset(dataset_name) as dataset: + assert client.dataset_path(dataset.name).exists() - with path.open(mode='r') as fp: - metadata = yaml.safe_load(fp) - for file_ in metadata['files']: - if file_['path'].endswith(filename): + for file_ in dataset.files: + if file_.path.endswith(filename): return file_ @@ -631,14 +628,14 @@ def test_dataset_update(client, runner, params): assert 0 == result.exit_code after = read_dataset_file_metadata(client, 'remote', 'CHANGES.rst') - assert after['_id'] == before['_id'] - assert after['_label'] != before['_label'] - assert after['added'] == before['added'] - assert after['url'] == before['url'] - assert after['based_on']['_id'] == before['based_on']['_id'] - assert after['based_on']['_label'] != before['based_on']['_label'] - assert after['based_on']['path'] == before['based_on']['path'] - assert after['based_on']['based_on'] is None + assert after._id == before._id + assert after._label != before._label + assert after.added == before.added + assert after.url == before.url + assert after.based_on._id == before.based_on._id + assert after.based_on._label != before.based_on._label + assert after.based_on.path == before.based_on.path + assert after.based_on.based_on is None @pytest.mark.integration @@ -792,12 +789,12 @@ def test_import_from_renku_project(tmpdir, client, runner): assert 0 == result.exit_code metadata = read_dataset_file_metadata(client, 'remote-dataset', 'file') - assert metadata['creator'][0]['name'] == remote['creator'][0]['name'] - assert metadata['based_on']['_id'] == remote['_id'] - assert metadata['based_on']['_label'] == remote['_label'] - assert metadata['based_on']['path'] == remote['path'] - assert metadata['based_on']['based_on'] is None - assert metadata['based_on']['url'] == REMOTE + assert metadata.creator[0].name == remote.creator[0].name + assert metadata.based_on._id == remote._id + assert metadata.based_on._label == remote._label + assert metadata.based_on.path == remote.path + assert metadata.based_on.based_on is None + assert metadata.based_on.url == REMOTE @pytest.mark.integration