|
21 | 21 | # In applying this license, CERN does not |
22 | 22 | # waive the privileges and immunities granted to it by virtue of its status |
23 | 23 | # as an Intergovernmental Organization or submit itself to any jurisdiction. |
24 | | -"""CAP Cli.""" |
| 24 | +"""CAP Schema cli.""" |
25 | 25 |
|
26 | 26 | import json |
27 | 27 | import os |
|
30 | 30 | from flask_cli import with_appcontext |
31 | 31 | from invenio_db import db |
32 | 32 | from invenio_jsonschemas.errors import JSONSchemaNotFound |
| 33 | +from invenio_search import current_search_client |
33 | 34 | from sqlalchemy.exc import IntegrityError |
34 | 35 |
|
| 36 | +from cap.cli import MutuallyExclusiveOption |
| 37 | +from cap.modules.deposit.errors import DepositValidationError |
35 | 38 | from cap.modules.fixtures.cli import fixtures |
| 39 | +from cap.modules.records.api import CAPRecord |
| 40 | +from cap.modules.schemas.models import Schema |
| 41 | +from cap.modules.schemas.resolvers import resolve_schema_by_url,\ |
| 42 | + resolve_schema_by_name_and_version, schema_name_to_url |
36 | 43 |
|
37 | | -from .models import Schema |
| 44 | + |
| 45 | +@fixtures.command() |
| 46 | +@click.option('--schema-url', '-u', |
| 47 | + cls=MutuallyExclusiveOption, |
| 48 | + mutually_exclusive=["ana_type", "ana_version"], |
| 49 | + help='The url of the schema used for validation.') |
| 50 | +@click.option('--ana-type', '-a', |
| 51 | + cls=MutuallyExclusiveOption, |
| 52 | + mutually_exclusive=["schema_url"], |
| 53 | + help='The analysis type of the schema used for validation.') |
| 54 | +@click.option('--ana-version', '-v', |
| 55 | + help='The analysis version of the records.') |
| 56 | +@click.option('--compare-with', '-c', |
| 57 | + help='The schema version, that the ' |
| 58 | + 'records should be compared to.') |
| 59 | +@click.option('--status', '-s', |
| 60 | + default='draft', |
| 61 | + type=click.Choice(['draft', 'published'], case_sensitive=False), |
| 62 | + help='The metadata type that will be used for validation.') |
| 63 | +@click.option('--export', '-e', |
| 64 | + type=click.Path(), |
| 65 | + help='A file where, the validation errors can be saved.') |
| 66 | +@with_appcontext |
| 67 | +def validate(schema_url, ana_type, ana_version, compare_with, status, export): |
| 68 | + """ |
| 69 | + Validate deposit or record metadata based on their schema. Provide the |
| 70 | + schema url OR ana-type and version, as well as the schema version that you |
| 71 | + want to compare the records you get, to. E.g. |
| 72 | +
|
| 73 | + If you do not provide an ana-version, it will get the latest. If you do |
| 74 | + not provide a -c parameter, the records will compare the data to their |
| 75 | + own schema. |
| 76 | +
|
| 77 | + cap fixtures validate -u https://analysispreservation.cern.ch/schemas/deposits/records/test-v2.0.0.json -c 1.0.0 # noqa |
| 78 | + cap fixtures validate -a test -c 1.0.0 |
| 79 | + """ |
| 80 | + try: |
| 81 | + if schema_url: |
| 82 | + schema = resolve_schema_by_url(schema_url) |
| 83 | + elif ana_type: |
| 84 | + schema = resolve_schema_by_name_and_version(ana_type, ana_version) |
| 85 | + else: |
| 86 | + raise click.UsageError( |
| 87 | + 'You need to provide the ana-type or the schema-url.') |
| 88 | + except JSONSchemaNotFound: |
| 89 | + raise click.UsageError( |
| 90 | + 'Schema not found.') |
| 91 | + except ValueError: |
| 92 | + raise click.UsageError( |
| 93 | + 'Version has to be passed as string <major>.<minor>.<patch>.') |
| 94 | + |
| 95 | + # differentiate between drafts/published |
| 96 | + from cap.modules.deposit.api import CAPDeposit |
| 97 | + if status == 'draft': |
| 98 | + search_path = 'deposits-records' |
| 99 | + cap_record_class = CAPDeposit |
| 100 | + else: |
| 101 | + search_path = 'records' |
| 102 | + cap_record_class = CAPRecord |
| 103 | + |
| 104 | + # get all the records for this specific schema/type combination |
| 105 | + records = current_search_client.search( |
| 106 | + search_path, |
| 107 | + q=f'_deposit.status: {status} AND ' |
| 108 | + f'$schema: "{schema_name_to_url(schema.name, schema.version)}"', |
| 109 | + size=5000 |
| 110 | + )['hits']['hits'] |
| 111 | + pids = [rec['_id'] for rec in records] |
| 112 | + |
| 113 | + click.secho( |
| 114 | + f'{len(records)} record(s) of {schema.name} found.\n', fg='green') |
| 115 | + |
| 116 | + total_errors = [] |
| 117 | + for pid in pids: |
| 118 | + cap_record = cap_record_class.get_record(pid) |
| 119 | + |
| 120 | + # get the url of the schema version, used for validation |
| 121 | + if compare_with: |
| 122 | + cap_record['$schema'] = schema_name_to_url( |
| 123 | + schema.name, compare_with) |
| 124 | + try: |
| 125 | + cap_record.validate() |
| 126 | + click.secho(f'No errors found in record {pid}', fg='green') |
| 127 | + except DepositValidationError as exc: |
| 128 | + error_list = '\n'.join(str(err.res) for err in exc.errors) |
| 129 | + msg = f'Errors in {pid}:\n{error_list}' |
| 130 | + click.secho(msg, fg='red') |
| 131 | + |
| 132 | + if export: |
| 133 | + total_errors.append(msg) |
| 134 | + |
| 135 | + # export the errors in a file |
| 136 | + if export: |
| 137 | + with open(export, 'w') as out: |
| 138 | + out.writelines('\n\n'.join(err for err in total_errors)) |
| 139 | + click.secho(f'Errors saved at {export}.', fg='red') |
38 | 140 |
|
39 | 141 |
|
40 | 142 | @fixtures.command() |
|
0 commit comments