Skip to content

Commit 5a115b2

Browse files
Lilykospamfilos
authored andcommitted
schemas: cli tool to validate records based on schema
Signed-off-by: Ilias Koutsakis <ilias.koutsakis@cern.ch>
1 parent 3dc4e53 commit 5a115b2

File tree

7 files changed

+360
-32
lines changed

7 files changed

+360
-32
lines changed

cap/cli.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,34 @@
2525
"""CAP base Invenio configuration."""
2626

2727
from __future__ import absolute_import, print_function
28-
28+
from click import Option, UsageError
2929
from invenio_base.app import create_cli
3030

3131
from cap.factory import create_api
3232

33+
34+
class MutuallyExclusiveOption(Option):
35+
"""
36+
Class that allows the use of mutually exclusive arguments in cli commands.
37+
"""
38+
def __init__(self, *args, **kwargs):
39+
self.mutually_exclusive = set(kwargs.pop('mutually_exclusive', []))
40+
self.help = kwargs.get('help', '')
41+
42+
if self.mutually_exclusive:
43+
self.exclusives = ', '.join(self.mutually_exclusive)
44+
kwargs['help'] = f'{self.help} NOTE: This argument is mutually ' \
45+
f'exclusive with arguments: [{self.exclusives}].'
46+
47+
super(MutuallyExclusiveOption, self).__init__(*args, **kwargs)
48+
49+
def handle_parse_result(self, ctx, opts, args):
50+
if self.mutually_exclusive.intersection(opts) and self.name in opts:
51+
raise UsageError(f'Illegal usage: `{self.name}` is mutually '
52+
f'exclusive with arguments [{self.exclusives}].')
53+
54+
return super(MutuallyExclusiveOption, self).\
55+
handle_parse_result(ctx, opts, args)
56+
57+
3358
cli = create_cli(create_app=create_api)

cap/modules/mail/cli.py

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,40 +24,16 @@
2424
"""Mail Cli."""
2525

2626
import click
27-
from click import Option, UsageError
2827
from flask import current_app
2928
from flask_cli import with_appcontext
3029

3130
from flask_mail import Message
3231

32+
from cap.cli import MutuallyExclusiveOption
3333
from cap.modules.mail.users import get_all_users, get_users_by_record,\
3434
get_users_by_experiment
3535

3636

37-
class MutuallyExclusiveOption(Option):
38-
def __init__(self, *args, **kwargs):
39-
self.mutually_exclusive = set(kwargs.pop('mutually_exclusive', []))
40-
help = kwargs.get('help', '')
41-
42-
if self.mutually_exclusive:
43-
ex_str = ', '.join(self.mutually_exclusive)
44-
kwargs['help'] = help + (
45-
' NOTE: This argument is mutually exclusive with'
46-
' arguments: [' + ex_str + '].'
47-
)
48-
super(MutuallyExclusiveOption, self).__init__(*args, **kwargs)
49-
50-
def handle_parse_result(self, ctx, opts, args):
51-
if self.mutually_exclusive.intersection(opts) and self.name in opts:
52-
raise UsageError(
53-
f"Illegal usage: `{self.name}` is mutually exclusive with "
54-
f"arguments `{', '.join(self.mutually_exclusive)}`."
55-
)
56-
57-
return super(MutuallyExclusiveOption, self).\
58-
handle_parse_result(ctx, opts, args)
59-
60-
6137
@click.group()
6238
def mail():
6339
"""Mail commands."""

cap/modules/schemas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@
2525

2626
from __future__ import absolute_import, print_function
2727

28-
from .cli import schemas
28+
from .cli import schemas, validate

cap/modules/schemas/cli.py

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
# In applying this license, CERN does not
2222
# waive the privileges and immunities granted to it by virtue of its status
2323
# as an Intergovernmental Organization or submit itself to any jurisdiction.
24-
"""CAP Cli."""
24+
"""CAP Schema cli."""
2525

2626
import json
2727
import os
@@ -30,11 +30,113 @@
3030
from flask_cli import with_appcontext
3131
from invenio_db import db
3232
from invenio_jsonschemas.errors import JSONSchemaNotFound
33+
from invenio_search import current_search_client
3334
from sqlalchemy.exc import IntegrityError
3435

36+
from cap.cli import MutuallyExclusiveOption
37+
from cap.modules.deposit.errors import DepositValidationError
3538
from cap.modules.fixtures.cli import fixtures
39+
from cap.modules.records.api import CAPRecord
40+
from cap.modules.schemas.models import Schema
41+
from cap.modules.schemas.resolvers import resolve_schema_by_url,\
42+
resolve_schema_by_name_and_version, schema_name_to_url
3643

37-
from .models import Schema
44+
45+
@fixtures.command()
46+
@click.option('--schema-url', '-u',
47+
cls=MutuallyExclusiveOption,
48+
mutually_exclusive=["ana_type", "ana_version"],
49+
help='The url of the schema used for validation.')
50+
@click.option('--ana-type', '-a',
51+
cls=MutuallyExclusiveOption,
52+
mutually_exclusive=["schema_url"],
53+
help='The analysis type of the schema used for validation.')
54+
@click.option('--ana-version', '-v',
55+
help='The analysis version of the records.')
56+
@click.option('--compare-with', '-c',
57+
help='The schema version, that the '
58+
'records should be compared to.')
59+
@click.option('--status', '-s',
60+
default='draft',
61+
type=click.Choice(['draft', 'published'], case_sensitive=False),
62+
help='The metadata type that will be used for validation.')
63+
@click.option('--export', '-e',
64+
type=click.Path(),
65+
help='A file where, the validation errors can be saved.')
66+
@with_appcontext
67+
def validate(schema_url, ana_type, ana_version, compare_with, status, export):
68+
"""
69+
Validate deposit or record metadata based on their schema. Provide the
70+
schema url OR ana-type and version, as well as the schema version that you
71+
want to compare the records you get, to. E.g.
72+
73+
If you do not provide an ana-version, it will get the latest. If you do
74+
not provide a -c parameter, the records will compare the data to their
75+
own schema.
76+
77+
cap fixtures validate -u https://analysispreservation.cern.ch/schemas/deposits/records/test-v2.0.0.json -c 1.0.0 # noqa
78+
cap fixtures validate -a test -c 1.0.0
79+
"""
80+
try:
81+
if schema_url:
82+
schema = resolve_schema_by_url(schema_url)
83+
elif ana_type:
84+
schema = resolve_schema_by_name_and_version(ana_type, ana_version)
85+
else:
86+
raise click.UsageError(
87+
'You need to provide the ana-type or the schema-url.')
88+
except JSONSchemaNotFound:
89+
raise click.UsageError(
90+
'Schema not found.')
91+
except ValueError:
92+
raise click.UsageError(
93+
'Version has to be passed as string <major>.<minor>.<patch>.')
94+
95+
# differentiate between drafts/published
96+
from cap.modules.deposit.api import CAPDeposit
97+
if status == 'draft':
98+
search_path = 'deposits-records'
99+
cap_record_class = CAPDeposit
100+
else:
101+
search_path = 'records'
102+
cap_record_class = CAPRecord
103+
104+
# get all the records for this specific schema/type combination
105+
records = current_search_client.search(
106+
search_path,
107+
q=f'_deposit.status: {status} AND '
108+
f'$schema: "{schema_name_to_url(schema.name, schema.version)}"',
109+
size=5000
110+
)['hits']['hits']
111+
pids = [rec['_id'] for rec in records]
112+
113+
click.secho(
114+
f'{len(records)} record(s) of {schema.name} found.\n', fg='green')
115+
116+
total_errors = []
117+
for pid in pids:
118+
cap_record = cap_record_class.get_record(pid)
119+
120+
# get the url of the schema version, used for validation
121+
if compare_with:
122+
cap_record['$schema'] = schema_name_to_url(
123+
schema.name, compare_with)
124+
try:
125+
cap_record.validate()
126+
click.secho(f'No errors found in record {pid}', fg='green')
127+
except DepositValidationError as exc:
128+
error_list = '\n'.join(str(err.res) for err in exc.errors)
129+
msg = f'Errors in {pid}:\n{error_list}'
130+
click.secho(msg, fg='red')
131+
132+
if export:
133+
total_errors.append(msg)
134+
135+
# export the errors in a file
136+
if export:
137+
with open(export, 'w') as out:
138+
out.writelines('\n\n'.join(err for err in total_errors))
139+
click.secho(f'Errors saved at {export}.', fg='red')
38140

39141

40142
@fixtures.command()

cap/modules/schemas/resolvers.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,19 @@ def parse_path(string):
9898
return match.groups() if match else None
9999

100100

101-
def schema_name_to_url(schema_name):
101+
def schema_name_to_url(schema_name, version=None):
102102
"""Return url eg. https://host.com/schemas/schema-v0.0.1.json."""
103-
schema = Schema.get_latest(schema_name)
103+
schema = resolve_schema_by_name_and_version(schema_name, version)
104104
url = current_jsonschemas.path_to_url(schema.deposit_path)
105-
106105
return url
107106

108107

108+
def resolve_schema_by_name_and_version(schema_name, version=None):
109+
"""Get Schema object for given name and (optionally) version."""
110+
return Schema.get(schema_name, version) if version \
111+
else Schema.get_latest(schema_name)
112+
113+
109114
@lru_cache(maxsize=1024)
110115
def resolve_schema_by_url(url):
111116
"""Get Schema object for given url."""

tests/conftest.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@
3131

3232
import pytest
3333
from flask import current_app
34+
from flask.cli import ScriptInfo
3435
from flask_principal import ActionNeed
3536
from flask_security import login_user
37+
3638
from invenio_access.models import ActionRoles, ActionUsers
3739
from invenio_accounts.testutils import create_test_user
3840
from invenio_app.config import APP_DEFAULT_SECURE_HEADERS
@@ -47,9 +49,12 @@
4749
from invenio_oauthclient.models import RemoteAccount
4850
from invenio_pidstore.resolver import Resolver
4951
from invenio_search import current_search, current_search_client
52+
53+
from click.testing import CliRunner
5054
from sqlalchemy_utils.functions import create_database, database_exists
5155
from werkzeug.local import LocalProxy
5256

57+
from cap.cli import cli
5358
from cap.factory import create_api
5459
from cap.modules.auth.models import OAuth2Token
5560
from cap.modules.deposit.api import CAPDeposit as Deposit
@@ -69,6 +74,19 @@
6974
_datastore = LocalProxy(lambda: current_app.extensions['security'].datastore)
7075

7176

77+
@pytest.fixture()
78+
def cli_runner(app):
79+
runner = CliRunner()
80+
script_info = ScriptInfo(create_app=lambda info: app)
81+
82+
def run(command):
83+
"""Run the command from the CLI."""
84+
command_args = command.split()
85+
return runner.invoke(cli, command_args, obj=script_info)
86+
87+
yield run
88+
89+
7290
@pytest.fixture(scope='session')
7391
def instance_path():
7492
"""Default instance path."""

0 commit comments

Comments
 (0)