diff --git a/.github/workflows/check-schemas.yml b/.github/workflows/check-schemas.yml new file mode 100644 index 0000000..675bbf9 --- /dev/null +++ b/.github/workflows/check-schemas.yml @@ -0,0 +1,24 @@ +name: Check Schemas + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + check-schemas: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install jsonschema + run: pip install jsonschema + - name: Run check_schemas.py + run: | + python tools/check_schemas.py diff --git a/faa/example.json b/faa/example.json new file mode 100644 index 0000000..bce80fe --- /dev/null +++ b/faa/example.json @@ -0,0 +1,24 @@ +{ + "v": "ACDC10JSON00044b_", + "d": "ED_jB_CgaoUrmd7d9Ln3gcq0ArsvOCs0EkGJpH_ubnhU", + "i": "EC4SuEyzrRwu3FWFrK0Ubd9xejlo5bUwAtGcbBGUk2nL", + "ri": "EM2YZ78SKE8eO4W1lQOJeer5xKZqLmJV7SPr3Ji5DMBZ", + "s": "EF1f9-7xwdba4L3b2OsnhUMWb02NyXJAYJ34RrAFkHs8", + "a": { + "d": "EFM0s2YlM32LAh0h77tvkz3OR6AaYmHcVchm9oVNkvbI", + "u": "0ADY50r_UJ3OaDC9ww7UcwXM", + "dt": "2025-03-01T14:22:00.000000+00:00", + "art_digest": "EK2r6EnDXre2pecTBO8s99j4OtNaaDIhVyr7uGugDhmp", + "art_posture": "witness", + "rev_latency": 0, + "content_type": "image/jpeg", + "content_size": 2847392, + "content_location": [ + "https://evidence.example.org/artifacts/crash-scene-photo-01.jpg", + "https://backup.example.org/artifacts/crash-scene-photo-01.jpg" + ], + "filename": "crash-scene-photo-01.SAID:EK2r6EnDXre2pecTBO8s99j4OtNaaDIhVyr7uGugDhmp.jpg", + "description": "Photograph of the intersection of Main St and 5th Ave taken from the northwest corner, showing vehicle positions immediately after the collision.", + "provenance": "Captured by traffic camera AID EC4SuEyzrRwu3FWFrK0Ubd9xejlo5bUwAtGcbBGUk2nL at 2025-03-01T14:19:43Z and downloaded by the insurance adjustor at 2025-03-01T14:22:00Z." + } +} \ No newline at end of file diff --git a/faa/faa.schema.json b/faa/faa.schema.json new file mode 100644 index 0000000..520a9a7 --- /dev/null +++ b/faa/faa.schema.json @@ -0,0 +1,169 @@ +{ + "$id": "ELuPsd1mylvD9iY_gH5hEJzoUd8OZWtU6J2svJwIq-TJ", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Foreign Artifact Affidavit", + "description": "An ACDC wrapper that gives arbitrary binary data a tamper-evident, cryptographic identity, and that documents key attributes. This allows it to be cited as evidence in a verifiable data graph.", + "type": "object", + "credentialType": "foreign-artifact-affidavit", + "version": "1.0.0", + "required": [ + "v", + "d", + "i", + "s", + "a" + ], + "properties": { + "v": { + "description": "Version string using ACDC conventions", + "type": "string" + }, + "d": { + "description": "SAID of this ACDC", + "type": "string", + "pattern": "^(?:[BE][A-Za-z0-9_-]{43}|0[DEFG][A-Za-z0-9_-]{86})$" + }, + "u": { + "description": "A salty nonce, present if the issuer wishes to blind the attributes of this ACDC", + "type": "string" + }, + "i": { + "description": "AID of the issuer -- the party attesting to the artifact's integrity and provenance", + "type": "string" + }, + "ri": { + "description": "Registry for issuer's credential status", + "type": "string", + "pattern": "^(?:[BE][A-Za-z0-9_-]{43}|0[DEFG][A-Za-z0-9_-]{86})$" + }, + "s": { + "description": "SAID of this schema", + "type": "string", + "pattern": "^(?:[BE][A-Za-z0-9_-]{43}|0[DEFG][A-Za-z0-9_-]{86})$" + }, + "a": { + "oneOf": [ + { + "description": "SAID of attributes block (compact form)", + "type": "string" + }, + { + "description": "Attributes block", + "$id": "EHhP6un6a3BqmyTJbCJ4TBz4cctdDgKc9TAi7-cEUrDJ", + "type": "object", + "required": [ + "d", + "art_digest", + "art_posture" + ], + "properties": { + "d": { + "description": "SAID of attributes block", + "type": "string", + "pattern": "^(?:[BE][A-Za-z0-9_-]{43}|0[DEFG][A-Za-z0-9_-]{86})$" + }, + "u": { + "description": "A salty nonce", + "type": "string" + }, + "dt": { + "description": "Issuance datetime, ISO 8601", + "type": "string", + "format": "date-time" + }, + "content_identifier": { + "description": "A CESR-encoded cryptographic hash of the artifact. SHOULD be a SAID; see https://doi.org/10.2139/ssrn.6128466 for saidification options. MAY be a raw CESR hash if saidification is impractical.", + "type": "string", + "pattern": "^(?:[EFGHI][A-Za-z0-9_-]{43}|0[DEFG][A-Za-z0-9_-]{86})$", + "examples": [ + "EK2r6EnDXre2pecTBO8s99j4OtNaaDIhVyr7uGugDhmp" + ] + }, + "art_posture": { + "description": "Posture of the issuer with respect to the artifact. Must be one of: 'record' (record the artifact's identifer without necessarily seeing the artifact), 'witness' (witness the artifact and compute art_digest from it), or 'verify' (verify the artifact according to its native rules).", + "type": "string", + "enum": [ + "record", + "witness", + "verify" + ] + }, + "rev_latency": { + "description": "Assertion that the issuer makes about how quickly it will revoke this ACDC if its corresponding artifact is revoked. A non-negative integer measuring seconds of elapsed time between the artifact revocation and its subsequent ACDC revocation. The most common value is 0, meaning the artifact has no revocation semantics, and/or the issuer takes no responsibility for keeping the ACDC's revocation aligned with the artifact's. When the artifact is a foreign credential type, and the FAA issuer acts as a bridge, this field allows an indirect revocation test of the foreign value, if the issuer is trusted.", + "type": "integer", + "minimum": 0 + }, + "content_type": { + "description": "MIME type of the artifact, per IANA media type registry. Although 'application/octet-stream' is valid, overly generic types are discouraged because the limit low-context data processing.", + "type": "string", + "pattern": "^[a-z]+/[a-z0-9][a-z0-9._+-]*$", + "examples": [ + "image/jpeg", + "application/pdf", + "audio/mpeg", + "application/octet-stream", + "text/plain" + ] + }, + "content_size": { + "description": "Size of the artifact in bytes. Allows a recipient to anticipate transfer costs before retrieving the artifact.", + "type": "integer", + "minimum": 0 + }, + "content_location": { + "description": "One or more URIs from which the artifact may be retrieved. Multiple URIs may reflect redundant hosting, different transport protocols, or different access tiers (e.g., a public CDN and an authenticated endpoint). The presence of this field does not guarantee availability; it is informational. However, use of data:// URIs is supported and allows direct embedding of the content.", + "oneOf": [ + { + "type": "string", + "format": "uri" + }, + { + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { + "type": "string", + "format": "uri" + } + } + ] + }, + "filename": { + "description": "The original filename of the artifact at the time of wrapping. For artifacts using the externalized SAID algorithm (xSAID), this field SHOULD reflect the saidified filename including the embedded SAID.", + "type": "string" + }, + "description": { + "description": "A human-readable description of the artifact's content or significance. Not intended for machine interpretation.", + "type": "string" + }, + "provenance": { + "description": "A human-readable statement of how the artifact was obtained or created \u2014 for example, 'Captured by device AID EX... at 2025-03-01T09:00:00Z', 'Downloaded from https://...', or 'Received via email from ...'. For machine-verifiable provenance, use an ACDC edge instead.", + "type": "string" + } + }, + "additionalProperties": true + } + ] + }, + "r": { + "oneOf": [ + { + "description": "Rules section SAID (compact form)", + "type": "string" + }, + { + "description": "Rules detail", + "type": "object", + "properties": { + "d": { + "description": "SAID of rules block", + "type": "string" + } + }, + "additionalProperties": true + } + ] + } + }, + "additionalProperties": true +} \ No newline at end of file diff --git a/faa/index.md b/faa/index.md new file mode 100644 index 0000000..a3e226a --- /dev/null +++ b/faa/index.md @@ -0,0 +1,44 @@ +## Foreign Artifact Affidavit (FAA) + +### Purpose + +A Foreign Artifact Affidavit (FAA, pronounced `/fa/` like the musical syllable) is a wrapper that gives non-ACDC data the attributes that are needed to participate fully in an ACDC data graph. It is a simple but powerful interoperability mechanism. + +Modern life is full of credential and credential-like data types: X509 certs, W3C VCs, SD-JWTs, AnonCreds, ISO mDOC/mDL, remote attestations from a secure enclave, a variety of tokens, and various flavors of signed PDF are digital examples; birth certificates, passports, and citizen ID cards are physical examples. Instances of these artifacts have various ways to identify themselves and to point to parties that have a role relative to them -- but we may still want them to participate in standard ACDC data graphs. ([Verifiable Voice Protocol](https://datatracker.ietf.org/doc/draft-hardman-verifiable-voice-protocol/) is an example of an evidence-dependent industry mechanism that explicitly contemplates such integration.) + +In addition, we know of other, arbitrary forms of data that often need to be formally referenced as evidence, but that are not credential-like at all: spreadsheets, documents, photographs, audio and video recordings, sensor readings, fingerprints, videos, musical scores, G-codes, blood-glucose measurements, courtroom transcriptions. And non-digital embodiments are also important here: soil samples, biopsies, genomes, analog recordings. It is not clear how to cite arbitrary data in an ACDC. + +A FAA places non-ACDC data in an efficient, tamper-evident envelope with predictable metadata. This lets the ACDC ecosystem cite it, reason about it, and verify some of its properties in standard ways. + +### Physical to digital + +Specifying how to digitize data is out of scope, but we make the simplifying assumption that digitization will occur in some way that's meaningful to ecosystem participants. X-rays and ultrasounds turn into DICOM images, genomes turn into FASTQ or VCF, dental impressions turn into STL, analog signals from a radiotelescope turn into FITS, the bumper of a crashed car turns into photos at the accident scene, and so forth. + +### Arbitrary digital to tamper-evident digital + +Once a reference version of the data is available digitally, we need a way to refer to it. This reference must be tamper-evident (at least to the fidelity of the sample resolution, if quantized). + +ACDCs use hashes of the full content of the data for this purpose. If data lends itself to standard *saidification*, the SAID of the data SHOULD identify it. Otherwise, the [bytewise or externalized SAID algorithms](https://doi.org/10.2139/ssrn.6128466) SHOULD be applied to the data, or the data MAY simply be hashed. In all cases, the resulting digest is encoded as CESR, and in this form becomes the tamper-evident, self-describing way to reference the digital data it derives from. This CESR-encoded digest is stored in the `art_digest` field in the FAA schema. The data identified by this digest is called the *artifact* of the FAA. + +### What belongs in this credential + +In and of itself, a FAA makes no claim about the *meaning* or *significance* of its artifact. That interpretation is left to the ACDC (e.g., a [dossier](https://trustoverip.github.io/kswg-dossier-specification) that cites it and to any governance framework that provides context. The FAA attests to: + +- The artifact's cryptographic identity (`art_digest`) +- Optional metadata about the artifact: content type, size, location, filename, a human-readable description, a + description of provenance, issuance date +- The issuer's posture with respect to the artifact (`art_posture`): did the issuer merely record `art_digest` at the request of a third party, or witness the artifact directly and compute `art_digest` from it, or actually verify the integrity of the artifact at a particular moment, according to that artifact's native rules? +- The issuer's assertion with respect to revocation of the artifact (`rev_latency`): if the artifact is revoked, does the issuer of the FAA attempt a corresponding revocation of this ACDC -- and if so, how quickly? (Normally, the issuer of an FAA makes no attempt to react to revocation events of its corresponding artifact, either because the artfact has no revocation semantics, or because tracking revocation is a burden on an issuer that's just trying to create a lightweight affidavit. However, if the FAA is issued by an entity that's actively trying to bridge between another credential ecosystem and ACDCs, a non-zero value here allows verifiers to treat the FAA as a proxy for the foreign credential, assuming they trust the FAA issuer.) + +### Schema + +See [faa.schema.json](faa.schema.json). + +### Example + +The example ([example.json](example.json)) shows a FAA wrapping a JPEG photograph of a traffic accident scene. The `art_digest` field holds an xSAID, reflected also in the `filename` field. The `provenance` field records the human-readable chain of custody from the traffic camera to the insurance adjustor. + +### Extension + +Implementers MAY define additional fields in the `a` section beyond those +defined in this schema. \ No newline at end of file diff --git a/registry.json b/registry.json index e833eaa..14f952d 100644 --- a/registry.json +++ b/registry.json @@ -1,29 +1,30 @@ { - "EDH3Q0MW6oCcwyYw2MN39n1YfPs37o1QEv86kB-fBzmh": "citation/citation.schema.json", "EK3YbEFp3zUuHPsGrLRzwr6zWBXcOFJNTTBqezXd_2yf": "a2p-campaign/a2p-campaign.schema.json", - "EBxJHMk6MOEUogB6A1rP5x9te7DscPfxFfUGJCkq1Lq5": "award/award.schema.json", + "EKR1nFV0JvRhc4xkhzlZJfpN_taaspipcZWWSLj37Fdk": "aegis-std-vetting/aegis-std-vetting.schema.json", + "EPhWFgeOy8g7yRy-Xtyvbdieqvl_3YVXNHMgTEZuJOWh": "ai-coder/ai-coder.schema.json", "EBCnd7qk82wLBOgFukdmsdkksAuPpmzt5-eg9YKWWP3j": "ai-user-coca/ai-user-coca.schema.json", - "EFvnoHDY7I-kaBBeKlbDbkjG4BaI0nKLGadxBdjMGgSQ": "tn-alloc/tn-alloc.schema.json", + "EJxFPpyDRV-W6O2Vtjdy2K90ltWmQK8l1jePw5YOo_Ft": "attestation/attestation.schema.json", + "EBxJHMk6MOEUogB6A1rP5x9te7DscPfxFfUGJCkq1Lq5": "award/award.schema.json", + "EFvHYHX0cUx9sdjxZOr9fpPcQKdzRNFH42D8R29p7lAH": "bindkey/bindkey.schema.json", + "EBpGNZSWwj-btOJMJSMLCVoXbtKdJTcggO-zMevr4vH_": "brand-owner/brand-owner.schema.json", + "EDH3Q0MW6oCcwyYw2MN39n1YfPs37o1QEv86kB-fBzmh": "citation/citation.schema.json", + "ELuPsd1mylvD9iY_gH5hEJzoUd8OZWtU6J2svJwIq-TJ": "faa/faa.schema.json", "EKrv2S0OVc8SeKCzIAOSE-y4j5ybLOOgB69y12Lzxh6Y": "face-to-face/face-to-face.schema.json", - "EPhWFgeOy8g7yRy-Xtyvbdieqvl_3YVXNHMgTEZuJOWh": "ai-coder/ai-coder.schema.json", "EL7irIKYJL9Io0hhKSGWI4OznhwC7qgJG5Qf4aEs6j0o": "gcd/gcd.schema.json", - "EH1jN4U4LMYHmPVI4FYdZ10bIPR7YWKp8TDdZ9Y9Al-P": "vvp-dossier/vvp-dossier.schema.json", - "EAoRVmgPyacjhUxaV0nPwiuUuHMjKDpNZrj7ClofZ-3Z": "ovc-brand-owner/ovc-brand-owner.schema.json", "EJvwY9n7EsJ4ZejUBHFrnrNammC8BkGI9YaW1Wnp5c22": "org-vet/org-vet.schema.json", - "EFvHYHX0cUx9sdjxZOr9fpPcQKdzRNFH42D8R29p7lAH": "bindkey/bindkey.schema.json", + "EAoRVmgPyacjhUxaV0nPwiuUuHMjKDpNZrj7ClofZ-3Z": "ovc-brand-owner/ovc-brand-owner.schema.json", "EHFdm3U_4nML6lo-q_xDTO8183hC9HlWif2l4ycNo8TW": "ovc-org-vet/ovc-org-vet.schema.json", - "EBpGNZSWwj-btOJMJSMLCVoXbtKdJTcggO-zMevr4vH_": "brand-owner/brand-owner.schema.json", - "EJxFPpyDRV-W6O2Vtjdy2K90ltWmQK8l1jePw5YOo_Ft": "attestation/attestation.schema.json", - "EPy_7LE3tVdl8qEKN5i4L8eAgIM-1I51-DNiewmcq-fe": "tcr-vetting/tcr-vetting.schema.json", "EG68irpfVX667KCLwG85Cn1Mp3sCe38ftARyQJrxP2kF": "proof-of-control/proof-of-control.schema.json", - "EKR1nFV0JvRhc4xkhzlZJfpN_taaspipcZWWSLj37Fdk": "aegis-std-vetting/aegis-std-vetting.schema.json", + "EPy_7LE3tVdl8qEKN5i4L8eAgIM-1I51-DNiewmcq-fe": "tcr-vetting/tcr-vetting.schema.json", + "EFvnoHDY7I-kaBBeKlbDbkjG4BaI0nKLGadxBdjMGgSQ": "tn-alloc/tn-alloc.schema.json", "EGEebb1pVRcZ6OXHlYitl5DNh-LDrMWPwRtstiKiDhRy": "tn/tn.schema.json", + "EH6ekLjSr8V32WyFbGe1zXjTzFs9PkTYmupJ9H65O14g": "vLEI/acdc/ecr-authorization-vlei-credential.json", + "EEy9PkikFcANV1l7EHukCeXqrzT1hNZjGlUk7wuMO5jw": "vLEI/acdc/legal-entity-engagement-context-role-vLEI-credential.json", "EBNaNu-M9P5cgrnfl2Fvymy4E_jvxxyjb70PRtiANlJy": "vLEI/acdc/legal-entity-official-organizational-role-vLEI-credential.json", - "EMhvwOlyEJ9kN4PrwCpr9Jsv7TxPhiYveZ0oP3lJzdEi": "vLEI/acdc/verifiable-ixbrl-report-attestation.json", + "ENPXp1vQzRF6JwIuS-mp2U8Uf1MoADoP_GqQ62VsDZWY": "vLEI/acdc/legal-entity-vLEI-credential.json", "EKA57bKBKxr_kN7iN5i7lMUxpMG-s19dRcmov1iDxz-E": "vLEI/acdc/oor-authorization-vlei-credential.json", - "EEy9PkikFcANV1l7EHukCeXqrzT1hNZjGlUk7wuMO5jw": "vLEI/acdc/legal-entity-engagement-context-role-vLEI-credential.json", + "EBfdlu8R27Fbx-ehrqwImnK-8Cm79sqbAQ4MmvEAYqao": "vLEI/acdc/qualified-vLEI-issuer-vLEI-credential.json", + "EMhvwOlyEJ9kN4PrwCpr9Jsv7TxPhiYveZ0oP3lJzdEi": "vLEI/acdc/verifiable-ixbrl-report-attestation.json", "ECYorXkheU7YsXZkYLGtvBOxEZ6alS5H5FJRn0tgDXV0": "vLEI/acdc/verifiable-ixbrl-report-d6-attestation.json", - "ENPXp1vQzRF6JwIuS-mp2U8Uf1MoADoP_GqQ62VsDZWY": "vLEI/acdc/legal-entity-vLEI-credential.json", - "EH6ekLjSr8V32WyFbGe1zXjTzFs9PkTYmupJ9H65O14g": "vLEI/acdc/ecr-authorization-vlei-credential.json", - "EBfdlu8R27Fbx-ehrqwImnK-8Cm79sqbAQ4MmvEAYqao": "vLEI/acdc/qualified-vLEI-issuer-vLEI-credential.json" + "EH1jN4U4LMYHmPVI4FYdZ10bIPR7YWKp8TDdZ9Y9Al-P": "vvp-dossier/vvp-dossier.schema.json" } \ No newline at end of file diff --git a/tools/.ignore_check_schemas b/tools/.ignore_check_schemas new file mode 100644 index 0000000..ef0c0d8 --- /dev/null +++ b/tools/.ignore_check_schemas @@ -0,0 +1,2 @@ +'vLEI' is not lower-case kabob-case +Missing schema file: .*vLEI.schema.json \ No newline at end of file diff --git a/tools/check_schemas.py b/tools/check_schemas.py new file mode 100644 index 0000000..c9dfd21 --- /dev/null +++ b/tools/check_schemas.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +import sys +import os +import re +import json +from jsonschema import Draft202012Validator, exceptions as jsonschema_exceptions + +IGNORE_FILE = os.path.join(os.path.dirname(__file__), ".ignore_check_schemas") +error_count = 0 +ignore_regexes = [] +folders = [] +schemas_as_json = [] # List of (filename, jsonobj) + +def load_ignore_regexes(): + global ignore_regexes + if os.path.exists(IGNORE_FILE): + with open(IGNORE_FILE, "r") as f: + ignore_regexes = [re.compile(line.strip()) for line in f if line.strip()] + else: + ignore_regexes = [] + +def report_error(msg): + global error_count + for regex in ignore_regexes: + if regex.search(msg): + return # Suppress error + print(msg, file=sys.stderr) + error_count += 1 + +def check_nothing(): + # Placeholder for a real check + pass + +# Helper to get repo root +def get_repo_root(): + return os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) + +def populate_folders(): + global folders + repo_root = get_repo_root() + exclude = {"tools", "venv", "env"} + for name in os.listdir(repo_root): + path = os.path.join(repo_root, name) + if os.path.isdir(path) and not name.startswith(".") and name not in exclude: + folders.append(name) + +def check_folder_names(): + kabob_case_re = re.compile(r'^[a-z0-9]+(-[a-z0-9]+)*$') + for folder in folders: + if not kabob_case_re.match(folder): + report_error(f"Folder name '{folder}' is not lower-case kabob-case.") + +def check_0_schema_basics(): + global schemas_as_json + repo_root = get_repo_root() + for folder in folders: + schema_file = os.path.join(repo_root, folder, f"{folder}.schema.json") + if not os.path.isfile(schema_file): + report_error(f"Missing schema file: {schema_file}") + continue + # Check valid JSON + try: + with open(schema_file, "r") as f: + schema = json.load(f) + schemas_as_json.append((schema_file, schema)) + except Exception as e: + report_error(f"Invalid JSON in {schema_file}: {e}") + continue + # Check valid JSON Schema + try: + Draft202012Validator.check_schema(schema) + except jsonschema_exceptions.SchemaError as e: + report_error(f"Invalid JSON Schema in {schema_file}: {e}") + +def check_json_fragments(): + repo_root = get_repo_root() + for folder in folders: + folder_path = os.path.join(repo_root, folder) + for fname in os.listdir(folder_path): + if fname.endswith(".json") and not fname.endswith(".schema.json"): + fpath = os.path.join(folder_path, fname) + try: + with open(fpath, "r") as f: + json.load(f) + except Exception as e: + report_error(f"Invalid JSON in {fpath}: {e}") + +def check_fields(): + pattern_re = r'^(?:[BE][A-Za-z0-9_-]{43}|0[DEFG][A-Za-z0-9_-]{86})$' + dt_regex = re.compile(r'.*\Wdt$') + field_type_map = {} + a_field_type_map = {} + for filename, schema in schemas_as_json: + props = schema.get('properties', {}) + for field, spec in props.items(): + ftype = spec.get('type') + # Error if no type declared + if ftype is None: + report_error(f"In {filename}: Field '{field}' has no declared type.") + ftype = 'unknown' + field_type_map.setdefault(field, set()).add(ftype) + # Check rules for d, i, s, ri + if field in {"d", "i", "s", "ri"}: + if ftype != "string": + report_error(f"In {filename}: Field '{field}' should be type string, got {ftype}.") + pattern = spec.get('pattern') + if pattern != pattern_re: + report_error(f"In {filename}: Field '{field}' should have pattern '{pattern_re}', got '{pattern}'.") + # Check rule for dt and any field matching .*\Wdt$ + if field == "dt" or dt_regex.match(field): + if ftype != "string": + report_error(f"In {filename}: Field '{field}' should be type string, got {ftype}.") + fmt = spec.get('format') + if fmt != "date-time": + report_error(f"In {filename}: Field 'dt' should have format 'date-time', got '{fmt}'.") + # Also check fields inside /properties/a/properties + a_spec = props.get('a') + if a_spec: + a_type = a_spec.get('type') + if a_type != 'object' and 'oneOf' not in a_spec: + report_error(f"In {filename}: Field 'a' should be type object, got {a_type}.") + # Handle oneOf or direct object + a_props = None + if a_type == 'object' and 'properties' in a_spec: + a_props = a_spec['properties'] + elif 'oneOf' in a_spec: + for option in a_spec['oneOf']: + if option.get('type') == 'object' and 'properties' in option: + a_props = option['properties'] + break + if a_props: + for field, spec in a_props.items(): + ftype = spec.get('type') + if ftype is None: + report_error(f"In {filename} (/a/properties): Field '{field}' has no declared type.") + ftype = 'unknown' + a_field_type_map.setdefault(field, set()).add(ftype) + # Check rule for dt and any field matching .*\Wdt$ + if field == "dt" or dt_regex.match(field): + if ftype != "string": + report_error(f"In {filename} (/a/properties): Field '{field}' should be type string, got {ftype}.") + fmt = spec.get('format') + if fmt != "date-time": + report_error(f"In {filename} (/a/properties): Field 'dt' should have format 'date-time', got '{fmt}'.") + # Output field name -> data types pairs for user + print("Root field name -> data types:", file=sys.stderr) + for field, types in field_type_map.items(): + print(f" {field}: {', '.join(types)}", file=sys.stderr) + print("a/properties field name -> data types:", file=sys.stderr) + for field, types in a_field_type_map.items(): + print(f" {field}: {', '.join(types)}", file=sys.stderr) + +def main(): + # Set working dir to repo root + os.chdir(get_repo_root()) + load_ignore_regexes() + populate_folders() + # Find all check_* functions + check_funcs = [v for k, v in sorted(globals().items()) if k.startswith("check_") and callable(v)] + for func in check_funcs: + func() + if error_count > 0: + sys.exit(1) + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/tools/cli/register_all.py b/tools/cli/register_all.py index 0d4e1d3..cf58a60 100644 --- a/tools/cli/register_all.py +++ b/tools/cli/register_all.py @@ -56,8 +56,9 @@ def register_all(args): id = get_id(schema_file) schemas[id] = f"{vlei_dir}/{item}" + sorted_schemas = dict(sorted(schemas.items(), key=lambda item: item[1])) with open('registry.json', 'w') as f: - json.dump(schemas, f, indent=2) + json.dump(sorted_schemas, f, indent=2) def main(): args = parser.parse_args() diff --git a/tools/requirements.txt b/tools/requirements.txt index 1bb8278..9dfab6c 100644 --- a/tools/requirements.txt +++ b/tools/requirements.txt @@ -8,4 +8,5 @@ --index-url https://pypi.org/simple/ # pypi base pip index or local pip index --editable . # install as editable +jsonschema